From b35056c15b2a35426a82f75171e7aa80c5175851 Mon Sep 17 00:00:00 2001
From: Alessandro Tempia Calvino <44085674+aletempiac@users.noreply.github.com>
Date: Fri, 6 Oct 2023 09:43:59 +0200
Subject: [PATCH] Extending technology mapping and rewriting (#623)

* Adding emap and dependencies

* Adding tests cases, minor fixes

* Adding tests for struct library

* Adding don't care computation in mapping

* Adding don't cares in rewriting

* Improving efficiency in mapping and rewriting with DC

* Fixing bugs in emap, adding multioutput network representation and writer for emap, adding an adder extraction algorithm, adding new standard cell view for multioutput gates, adding utils to decompose multioutput cells into single output for verification reasons, and adding test cases

* Bug fix: possible delay increase when partially-dangling multi-output cells are deselected

* Finishing the support for white_boxes cloning in emap, solving incompatibility between klut_network and choice_view, adding tests for emap with white_boxes

* Adding documentation, changing name of map_adders to extract_adders

* Solving compilation issues, clang formatting, and updating workflows

* Updating block clone_node and relative test

* Changelog update

* Fixes in struct library

* Fixing conversion uint64 into uint32

* Fixing bugs in cuts (possible buffer overflow) and in struct_library

* Date update in emap file
---
 .github/workflows/linux.yml                   |    6 +-
 docs/algorithms/extract_adders.rst            |    6 +
 docs/algorithms/index_information.rst         |    3 +-
 docs/algorithms/mapper.rst                    |   96 +
 docs/algorithms/rewrite.rst                   |   28 +-
 docs/changelog.rst                            |    9 +
 docs/implementations.rst                      |   50 +
 docs/utils/util_data_structures.rst           |   20 +-
 docs/views.rst                                |   20 +-
 experiments/emap.cpp                          |  123 +
 .../experimental/decompose_multioutput.hpp    |  398 ++
 .../algorithms/experimental/emap.hpp          | 5398 +++++++++++++++++
 .../mockturtle/algorithms/extract_adders.hpp  |  971 +++
 include/mockturtle/algorithms/mapper.hpp      |  163 +-
 include/mockturtle/algorithms/rewrite.hpp     |  253 +-
 include/mockturtle/networks/block.hpp         |  976 +++
 include/mockturtle/networks/klut.hpp          |   15 +-
 include/mockturtle/networks/storage.hpp       |   14 +
 include/mockturtle/traits.hpp                 |  107 +-
 include/mockturtle/utils/algorithm.hpp        |   85 +
 include/mockturtle/utils/cuts.hpp             |    9 +-
 .../mockturtle/utils/include/supergate.hpp    |   92 +
 include/mockturtle/utils/standard_cell.hpp    |   98 +
 include/mockturtle/utils/struct_library.hpp   | 1470 +++++
 include/mockturtle/utils/super_utils.hpp      |  136 +-
 include/mockturtle/utils/tech_library.hpp     |  736 ++-
 include/mockturtle/utils/window_utils.hpp     |   45 +-
 include/mockturtle/views/cell_view.hpp        |  297 +
 include/mockturtle/views/choice_view.hpp      |    6 +-
 include/mockturtle/views/dont_touch_view.hpp  |  142 +
 lib/kitty/kitty/npn.hpp                       |  381 ++
 test/algorithms/experimental/emap.cpp         |  683 +++
 test/algorithms/extract_adders.cpp            |  146 +
 test/networks/block.cpp                       |  437 ++
 test/utils/struct_library.cpp                 |  220 +
 test/utils/super_utils.cpp                    |  227 +-
 test/utils/tech_library.cpp                   |  683 ++-
 test/views/cell_view.cpp                      |  202 +
 test/views/dont_touch_view.cpp                |  192 +
 39 files changed, 14744 insertions(+), 199 deletions(-)
 create mode 100644 docs/algorithms/extract_adders.rst
 create mode 100644 experiments/emap.cpp
 create mode 100644 include/mockturtle/algorithms/experimental/decompose_multioutput.hpp
 create mode 100644 include/mockturtle/algorithms/experimental/emap.hpp
 create mode 100644 include/mockturtle/algorithms/extract_adders.hpp
 create mode 100644 include/mockturtle/networks/block.hpp
 create mode 100644 include/mockturtle/utils/include/supergate.hpp
 create mode 100644 include/mockturtle/utils/standard_cell.hpp
 create mode 100644 include/mockturtle/utils/struct_library.hpp
 create mode 100644 include/mockturtle/views/cell_view.hpp
 create mode 100644 include/mockturtle/views/dont_touch_view.hpp
 create mode 100644 test/algorithms/experimental/emap.cpp
 create mode 100644 test/algorithms/extract_adders.cpp
 create mode 100644 test/networks/block.cpp
 create mode 100644 test/utils/struct_library.cpp
 create mode 100644 test/views/cell_view.cpp
 create mode 100644 test/views/dont_touch_view.cpp

diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml
index 6a8c8e8ab..8b30685f7 100644
--- a/.github/workflows/linux.yml
+++ b/.github/workflows/linux.yml
@@ -99,9 +99,9 @@ jobs:
       run: |
         cd build
         ./test/run_tests "~[quality]"
-  build-clang12:
+  build-clang13:
     runs-on: ubuntu-latest
-    name: Clang 12
+    name: Clang 13
     
     steps:
     - uses: actions/checkout@v1
@@ -111,7 +111,7 @@ jobs:
       run: |
         mkdir build
         cd build
-        cmake -DCMAKE_CXX_COMPILER=clang++-12 -DMOCKTURTLE_TEST=ON ..
+        cmake -DCMAKE_CXX_COMPILER=clang++-13 -DMOCKTURTLE_TEST=ON ..
         make run_tests
     - name: Run tests
       run: |
diff --git a/docs/algorithms/extract_adders.rst b/docs/algorithms/extract_adders.rst
new file mode 100644
index 000000000..2b62754eb
--- /dev/null
+++ b/docs/algorithms/extract_adders.rst
@@ -0,0 +1,6 @@
+Extract adders
+--------------
+
+**Header:** ``mockturtle/algorithms/extract_adders.hpp``
+
+.. doxygenfunction:: mockturtle::extract_adders
diff --git a/docs/algorithms/index_information.rst b/docs/algorithms/index_information.rst
index 9ee13760b..e67822fde 100644
--- a/docs/algorithms/index_information.rst
+++ b/docs/algorithms/index_information.rst
@@ -9,4 +9,5 @@ Network information extraction
    dont_cares
    cut_enumeration
    reconv_cut
-   extract_linear
\ No newline at end of file
+   extract_linear
+   extract_adders
\ No newline at end of file
diff --git a/docs/algorithms/mapper.rst b/docs/algorithms/mapper.rst
index cdd690395..9d4441229 100644
--- a/docs/algorithms/mapper.rst
+++ b/docs/algorithms/mapper.rst
@@ -98,6 +98,26 @@ database of structures:
    ps.required_time = std::numeric_limits<double>::max();
    sequential<mig_network> res = map( aig, exact_lib, ps );
 
+The newest version of `map` for graph mapping or rewriting can
+leverage satisfiability don't cares:
+
+.. code-block:: c++
+
+   aig_network aig = ...;
+   
+   /* load the npn database in the library and compute don't care classes */
+   mig_npn_resynthesis resyn{ true };
+   exact_library_params lps;
+   lps.compute_dc_classes = true;
+   exact_library<mig_network, mig_npn_resynthesis> exact_lib( resyn, lps );
+
+   /* perform area-oriented rewriting */
+   map_params ps;
+   ps.skip_delay_round = true;
+   ps.required_time = std::numeric_limits<double>::max();
+   ps.use_dont_cares = true;
+   mig_network res = map( aig, exact_lib, ps );
+
 As a default setting, cut enumeration minimizes the truth tables.
 This helps improving the results but slows down the computation.
 We suggest to keep it always true. Anyhow, for a faster mapping,
@@ -117,3 +137,79 @@ To increase this limit, change `max_cut_num` in `fast_network_cuts`.
 
 .. doxygenfunction:: mockturtle::map(Ntk const&, tech_library<NInputs, Configuration> const&, map_params const&, map_stats*)
 .. doxygenfunction:: mockturtle::map(Ntk&, exact_library<NtkDest, RewritingFn, NInputs> const&, map_params const&, map_stats*)
+
+
+
+Extended technology mapping
+---------------------------
+
+**Header:** ``mockturtle/algorithms/experimental/emap.hpp``
+
+The command `emap` stands for extended mapper. The current version
+supports up to 2-output gates, such as full adders and half adders,
+and it provides a 2x speedup in mapping time compared to command `map`
+for similar or better quality. Similarly, to `map`, the implementation
+is independent of the underlying graph representation. Moreover, `emap`
+supports "don't touch" white boxes.
+
+The following example shows how to perform delay-oriented technology mapping
+from an and-inverter graph using the default settings:
+
+.. code-block:: c++
+
+   aig_network aig = ...;
+
+   /* read cell library in genlib format */
+   std::vector<gate> gates;
+   std::ifstream in( ... );
+   lorina::read_genlib( in, genlib_reader( gates ) )
+   tech_library tech_lib( gates );
+
+   /* perform technology mapping */
+   binding_view<klut_network> res = emap( aig, tech_lib );
+
+The mapped network is returned as a `binding_view` that extends a k-LUT network.
+Each k-LUT abstracts a cell and the view contains the binding information.
+
+The next example performs area-oriented graph mapping using multi-output cells:
+
+.. code-block:: c++
+
+   aig_network aig = ...;
+
+   /* read cell library in genlib format */
+   std::vector<gate> gates;
+   std::ifstream in( ... );
+   lorina::read_genlib( in, genlib_reader( gates ) )
+   tech_library tech_lib( gates );
+
+   /* perform technology mapping */
+   emap_params ps;
+   ps.area_oriented_mapping = true;
+   ps.map_multioutput = true;
+   cell_view<block_network> res = emap_block( aig, tech_lib, ps );
+
+In this case, `emap_block` is used to return a `block_network`, which can respresent multi-output
+cells as single nodes. Alternatively, also `emap` can be used but multi-output cells
+would be reporesented by single-output nodes.
+
+The maximum number of cuts stored for each node is limited to 32.
+To increase this limit, change `max_cut_num` in `emap`.
+
+For further details and usage scenarios of `emap`, such as white boxes, please check the
+related tests.
+
+**Parameters and statistics**
+
+.. doxygenstruct:: mockturtle::emap_params
+   :members:
+
+.. doxygenstruct:: mockturtle::emap_stats
+   :members:
+
+**Algorithm**
+
+.. doxygenfunction:: mockturtle::emap(Ntk const&, tech_library<NInputs, Configuration> const&, emap_params const&, emap_stats*)
+.. doxygenfunction:: mockturtle::emap_block(Ntk const&, tech_library<NInputs, Configuration> const&, emap_params const&, emap_stats*)
+.. doxygenfunction:: mockturtle::emap_node_map(Ntk const&, tech_library<NInputs, Configuration> const&, emap_params const&, emap_stats*)
+.. doxygenfunction:: mockturtle::emap_load_mapping(Ntk&)
\ No newline at end of file
diff --git a/docs/algorithms/rewrite.rst b/docs/algorithms/rewrite.rst
index 01b838272..d1a88bda6 100644
--- a/docs/algorithms/rewrite.rst
+++ b/docs/algorithms/rewrite.rst
@@ -12,14 +12,11 @@ networks.  In this case the maximum number of variables for a node function is
    /* derive some MIG */
    mig_network mig = ...;
 
-   /* node resynthesis */
-   mig_npn_resynthesis resyn;
+   /* rewrite */
+   mig_npn_resynthesis resyn{ true };
    exact_library_params eps;
    eps.np_classification = false;
-   exact_library<xag_network, decltype( resyn )> exact_lib( resyn, eps );
-
-   /* rewrite */
-   rewrite( mig, exact_lib );
+   exact_library<mig_network, decltype( resyn )> exact_lib( resyn, eps );
 
 It is possible to change the cost function of nodes in rewrite.  Here is
 an example, in which the cost function only accounts for AND gates in a network,
@@ -42,6 +39,25 @@ which corresponds to the multiplicative complexity of a function.
    exact_library<xag_network, decltype( resyn )> exact_lib( resyn, eps );
    rewrite<decltype( Ntk ), decltype( exact_lib ), mc_cost>( ntk, exact_lib );
 
+Rewrite supports also satisfiability don't cares:
+
+.. code-block:: c++
+   
+   /* derive some MIG */
+   mig_network mig = ...;
+
+   /* rewrite */
+   mig_npn_resynthesis resyn{ true };
+   exact_library_params eps;
+   eps.np_classification = false;
+   eps.compute_dc_classes = true;
+   exact_library<mig_network, decltype( resyn )> exact_lib( resyn, eps );
+
+   /* rewrite */
+   rewrite_params ps;
+   ps.use_dont_cares = true;
+   rewrite( mig, exact_lib, ps );
+
 Parameters and statistics
 ~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/docs/changelog.rst b/docs/changelog.rst
index 14567271b..a1fbc84aa 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -16,6 +16,7 @@ v0.4 (not yet released)
     - Generic network implementation with additional node types (`generic_network`) `#594 <https://github.com/lsils/mockturtle/pull/594>`_
     - Adding `substitute_node_no_restrash` to `aig_network`, `xag_network`, `mig_network`, `xmg_network`, and `fanout_view` to substitute nodes without structural hashing and simplifications `#616 <https://github.com/lsils/mockturtle/pull/616>`_
     - Adding `replace_in_node_no_restrash` to `aig_network`, `xag_network`, `mig_network`, and `xmg_network` to replace a fanin without structural hashing and simplifications `#616 <https://github.com/lsils/mockturtle/pull/616>`_
+    - Adding a new network type to represent multi-output gates (`block_network`) `#623 <https://github.com/lsils/mockturtle/pull/623>`_
 * Algorithms:
     - AIG balancing (`aig_balance`) `#580 <https://github.com/lsils/mockturtle/pull/580>`_
     - Cost-generic resubstitution (`cost_generic_resub`) `#554 <https://github.com/lsils/mockturtle/pull/554>`_
@@ -33,6 +34,9 @@ v0.4 (not yet released)
     - Extensions and fixes in refactoring (`refactoring`) `#607 <https://github.com/lsils/mockturtle/pull/607>`_
     - Improving LUT mapping, changing its interface, and integrating SOP/ESOP balancing (`lut_map`) `#616 <https://github.com/lsils/mockturtle/pull/616>`_
     - Adding LUT-based SOP and ESOP balancing (`sop_balancing`, `esop_balancing`) `#616 <https://github.com/lsils/mockturtle/pull/616>`_
+    - Adding a new technology mapper supporting multi-output cells (`emap`) `#623 <https://github.com/lsils/mockturtle/pull/623>`_
+    - Adding circuit extraction of half and full adders (`extract_adders`) `#623 <https://github.com/lsils/mockturtle/pull/623>`_
+    - Adding don't care support in rewriting (`map`, `rewrite`) `#623 <https://github.com/lsils/mockturtle/pull/623>`_
 * I/O:
     - Write gates to GENLIB file (`write_genlib`) `#606 <https://github.com/lsils/mockturtle/pull/606>`_
 * Views:
@@ -42,11 +46,16 @@ v0.4 (not yet released)
     - Choice view for management of equivalent classes (`choice_view`) `#594 <https://github.com/lsils/mockturtle/pull/594>`_
     - Deterministic randomization option in topological sorting (`topo_view`) `#594 <https://github.com/lsils/mockturtle/pull/594>`_
     - Fixing MFFC view (`mffc_view`) `#607 <https://github.com/lsils/mockturtle/pull/607>`_
+    - Adding a view to represent standard cells including the multi-output ones (`cell_view`) `#623 <https://github.com/lsils/mockturtle/pull/623>`_
+    - Adding a view to mark nodes as don't touch elements (`dont_touch_view`) `#623 <https://github.com/lsils/mockturtle/pull/623>`_
 * Properties:
     - Cost functions based on the factored form literals count (`factored_literal_cost`) `#579 <https://github.com/lsils/mockturtle/pull/579>`_
 * Utils:
     - Add recursive cost function class to customize cost in resubstitution algorithm (`recursive_cost_function`) `#554 <https://github.com/lsils/mockturtle/pull/554>`_
     - Sum-of-products factoring utilities `#579 <https://github.com/lsils/mockturtle/pull/579>`_
+    - Adding utils to perform pattern matching and derive patterns from standard cells (`struct_library`) `#623 <https://github.com/lsils/mockturtle/pull/623>`_
+    - Adding Boolean matching for multi-output cells (`tech_library`) `#623 <https://github.com/lsils/mockturtle/pull/623>`_
+    - Adding Boolean matching with don't cares for databases (`exact_library`) `#623 <https://github.com/lsils/mockturtle/pull/623>`_
 
 v0.3 (July 12, 2022)
 --------------------
diff --git a/docs/implementations.rst b/docs/implementations.rst
index 4c6a36279..df996c17f 100644
--- a/docs/implementations.rst
+++ b/docs/implementations.rst
@@ -270,6 +270,32 @@ All network implementations are located in `mockturtle/networks/`:
 Supplementary network types
 ---------------------------
 
+Block Network
+~~~~~~~~~~~~~
+
+**Header:** ``mockturtle/networks/block.hpp``
+
+This header file defines a data structure of type `block_network`, which is primarily designed to
+represent both single-output and multi-output nodes.
+This data structure provides additional methods to create multi-output nodes and access
+the individual pins.
+
+Additional interfaces provided by this network type include:
+
+.. doxygenfunction:: mockturtle::block_network::is_multioutput
+.. doxygenfunction:: mockturtle::block_network::create_ha
+.. doxygenfunction:: mockturtle::block_network::create_hai
+.. doxygenfunction:: mockturtle::block_network::create_fa
+.. doxygenfunction:: mockturtle::block_network::create_fai
+.. doxygenfunction:: mockturtle::block_network::num_outputs
+.. doxygenfunction:: mockturtle::block_network::incr_fanout_size_pin
+.. doxygenfunction:: mockturtle::block_network::decr_fanout_size_pin
+.. doxygenfunction:: mockturtle::block_network::fanout_size_pin
+.. doxygenfunction:: mockturtle::block_network::node_function_pin
+.. doxygenfunction:: mockturtle::block_network::get_output_pin
+.. doxygenfunction:: mockturtle::block_network::next_output_pin
+
+
 Cover Network
 ~~~~~~~~~~~~~
 
@@ -334,3 +360,27 @@ Specific for `buffered_crossed_klut_network`:
 **Simulation of buffered networks**
 
 .. doxygenfunction:: mockturtle::simulate_buffered
+
+
+Generic Network
+~~~~~~~~~~~~~~~
+
+**Header:** ``mockturtle/networks/generic.hpp``
+
+This header file defines a data structure of type `generic_network`, which is primarily designed to
+represent different node types, such as white and black boxes, registers, input or output box pins.
+This data represent all the elements as nodes, including POs.
+
+Additional interfaces provided by this network type include:
+
+.. doxygenfunction:: mockturtle::generic_network::is_node
+.. doxygenfunction:: mockturtle::generic_network::is_register
+.. doxygenfunction:: mockturtle::generic_network::is_box_input
+.. doxygenfunction:: mockturtle::generic_network::is_box_output
+.. doxygenfunction:: mockturtle::generic_network::create_box_input
+.. doxygenfunction:: mockturtle::generic_network::create_box_output
+.. doxygenfunction:: mockturtle::generic_network::create_register
+.. doxygenfunction:: mockturtle::generic_network::foreach_register
+.. doxygenfunction:: mockturtle::generic_network::clear_values2
+.. doxygenfunction:: mockturtle::generic_network::value2
+.. doxygenfunction:: mockturtle::generic_network::set_value2
diff --git a/docs/utils/util_data_structures.rst b/docs/utils/util_data_structures.rst
index 9648ea8bc..1e5850112 100644
--- a/docs/utils/util_data_structures.rst
+++ b/docs/utils/util_data_structures.rst
@@ -54,7 +54,7 @@ validity tags to trade efficiency with memory.
 
 .. doxygenfunction:: mockturtle::initialize_copy_network
 
-Tech Library
+Tech library
 ~~~~~~~~~~~~
 
 **Header:** ``mockturtle/utils/tech_library.hpp``
@@ -72,7 +72,7 @@ Tech Library
 
 .. _exact_library:
 
-Exact Library
+Exact library
 ~~~~~~~~~~~~~
 
 **Header:** ``mockturtle/utils/tech_library.hpp``
@@ -101,6 +101,22 @@ Supergates utils
 .. doxygenclass:: mockturtle::super_utils
    :members:
 
+Struct library
+~~~~~~~~~~~~~~
+
+**Header:** ``mockturtle/utils/struct_library.hpp``
+
+.. doc_overview_table:: classmockturtle_1_1struct__library
+   :column: Method
+
+   get_struct_library
+   get_pattern_id
+   get_supergates_pattern
+   print_and_table
+
+.. doxygenclass:: mockturtle::struct_library
+   :members:
+
 Cuts
 ~~~~
 
diff --git a/docs/views.rst b/docs/views.rst
index 4c166fdc1..55c1810ec 100644
--- a/docs/views.rst
+++ b/docs/views.rst
@@ -84,14 +84,22 @@ algorithm.  Several views are implemented in mockturtle.
 .. doxygenclass:: mockturtle::window_view
    :members:
 
-`binding_view`: Add bindings to a technology library
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+`binding_view`: Add bindings from a technology library
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 **Header:** ``mockturtle/views/binding_view.hpp``
 
 .. doxygenclass:: mockturtle::binding_view
    :members:
 
+`cell_view`: Add cell mappings from a technology library
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+**Header:** ``mockturtle/views/cell_view.hpp``
+
+.. doxygenclass:: mockturtle::cell_view
+   :members:
+
 `names_view`: Assign names to signals and outputs
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -108,6 +116,14 @@ algorithm.  Several views are implemented in mockturtle.
 .. doxygenclass:: mockturtle::names_view
    :members:
 
+`dont_touch_view`: Mark nodes as "don't touch"
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+**Header:** ``mockturtle/views/dont_touch_view.hpp``
+
+.. doxygenclass:: mockturtle::dont_touch_view
+   :members:
+
 `cnf_view`: Creates a CNF while creating a network
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/experiments/emap.cpp b/experiments/emap.cpp
new file mode 100644
index 000000000..0a5c62f0a
--- /dev/null
+++ b/experiments/emap.cpp
@@ -0,0 +1,123 @@
+/* mockturtle: C++ logic network library
+ * Copyright (C) 2018-2023  EPFL
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <string>
+#include <vector>
+
+#include <fmt/format.h>
+#include <lorina/aiger.hpp>
+#include <lorina/genlib.hpp>
+#include <mockturtle/algorithms/experimental/decompose_multioutput.hpp>
+#include <mockturtle/algorithms/experimental/emap.hpp>
+#include <mockturtle/io/aiger_reader.hpp>
+#include <mockturtle/io/genlib_reader.hpp>
+#include <mockturtle/networks/aig.hpp>
+#include <mockturtle/networks/block.hpp>
+#include <mockturtle/networks/klut.hpp>
+#include <mockturtle/utils/tech_library.hpp>
+#include <mockturtle/views/cell_view.hpp>
+#include <mockturtle/views/depth_view.hpp>
+
+#include <experiments.hpp>
+
+std::string const mcnc_library = "GATE   inv1    1  O=!a;             PIN * INV 1 999 0.9 0.3 0.9 0.3\n"
+                                 "GATE   inv2    2  O=!a;             PIN * INV 2 999 1.0 0.1 1.0 0.1\n"
+                                 "GATE   inv3    3  O=!a;             PIN * INV 3 999 1.1 0.09 1.1 0.09\n"
+                                 "GATE   inv4    4  O=!a;             PIN * INV 4 999 1.2 0.07 1.2 0.07\n"
+                                 "GATE   nand2   2  O=!(a*b);         PIN * INV 1 999 1.0 0.2 1.0 0.2\n"
+                                 "GATE   nand3   3  O=!(a*b*c);       PIN * INV 1 999 1.1 0.3 1.1 0.3\n"
+                                 "GATE   nand4   4  O=!(a*b*c*d);     PIN * INV 1 999 1.4 0.4 1.4 0.4\n"
+                                 "GATE   nor2    2  O=!(a+b);         PIN * INV 1 999 1.4 0.5 1.4 0.5\n"
+                                 "GATE   nor3    3  O=!(a+b+c);       PIN * INV 1 999 2.4 0.7 2.4 0.7\n"
+                                 "GATE   nor4    4  O=!(a+b+c+d);     PIN * INV 1 999 3.8 1.0 3.8 1.0\n"
+                                 "GATE   and2    3  O=a*b;            PIN * NONINV 1 999 1.9 0.3 1.9 0.3\n"
+                                 "GATE   or2     3  O=a+b;            PIN * NONINV 1 999 2.4 0.3 2.4 0.3\n"
+                                 "GATE   xor2a   5  O=a*!b+!a*b;      PIN * UNKNOWN 2 999 1.9 0.5 1.9 0.5\n"
+                                 "#GATE  xor2b   5  O=!(a*b+!a*!b);   PIN * UNKNOWN 2 999 1.9 0.5 1.9 0.5\n"
+                                 "GATE   xnor2a  5  O=a*b+!a*!b;      PIN * UNKNOWN 2 999 2.1 0.5 2.1 0.5\n"
+                                 "#GATE  xnor2b  5  O=!(a*!b+!a*b);   PIN * UNKNOWN 2 999 2.1 0.5 2.1 0.5\n"
+                                 "GATE   aoi21   3  O=!(a*b+c);       PIN * INV 1 999 1.6 0.4 1.6 0.4\n"
+                                 "GATE   aoi22   4  O=!(a*b+c*d);     PIN * INV 1 999 2.0 0.4 2.0 0.4\n"
+                                 "GATE   oai21   3  O=!((a+b)*c);     PIN * INV 1 999 1.6 0.4 1.6 0.4\n"
+                                 "GATE   oai22   4  O=!((a+b)*(c+d)); PIN * INV 1 999 2.0 0.4 2.0 0.4\n"
+                                 "GATE   ha      5  O=!(a*b);         PIN * INV 1 999 1.2 0.4 1.2 0.4\n"
+                                 "GATE   ha      5  O=!a*!b+a*b;      PIN * INV 1 999 2.1 0.4 2.1 0.4\n"
+                                 "GATE   buf     2  O=a;              PIN * NONINV 1 999 1.0 0.0 1.0 0.0\n"
+                                 "GATE   zero    0  O=CONST0;\n"
+                                 "GATE   one     0  O=CONST1;";
+
+int main()
+{
+  using namespace experiments;
+  using namespace mockturtle;
+
+  experiment<std::string, uint32_t, double, uint32_t, double, uint32_t, float, bool> exp(
+      "emap", "benchmark", "size", "area_after", "depth", "delay_after", "multioutput", "runtime", "cec" );
+
+  fmt::print( "[i] processing technology library\n" );
+
+  /* library to map to technology */
+  std::vector<gate> gates;
+  std::stringstream in( mcnc_library );
+
+  if ( lorina::read_genlib( in, genlib_reader( gates ) ) != lorina::return_code::success )
+  {
+    return 1;
+  }
+
+  tech_library_params tps;
+  tps.verbose = true;
+  tech_library tech_lib( gates, tps );
+
+  for ( auto const& benchmark : epfl_benchmarks() )
+  {
+    fmt::print( "[i] processing {}\n", benchmark );
+
+    aig_network aig;
+    if ( lorina::read_aiger( benchmark_path( benchmark ), aiger_reader( aig ) ) != lorina::return_code::success )
+    {
+      continue;
+    }
+
+    const uint32_t size_before = aig.num_gates();
+    const uint32_t depth_before = depth_view( aig ).depth();
+
+    emap_params ps;
+    ps.map_multioutput = true;
+    emap_stats st;
+    cell_view<block_network> res = emap_block( aig, tech_lib, ps, &st );
+
+    /* decompose multi-output cells for verification purposes */
+    klut_network klut = decompose_multioutput<block_network, klut_network>( res );
+    const auto cec = benchmark == "hyp" ? true : abc_cec( klut, benchmark );
+
+    exp( benchmark, size_before, res.compute_area(), depth_before, res.compute_worst_delay(), st.multioutput_gates, to_seconds( st.time_total ), cec );
+  }
+
+  exp.save();
+  exp.table();
+
+  return 0;
+}
\ No newline at end of file
diff --git a/include/mockturtle/algorithms/experimental/decompose_multioutput.hpp b/include/mockturtle/algorithms/experimental/decompose_multioutput.hpp
new file mode 100644
index 000000000..e1ff6783a
--- /dev/null
+++ b/include/mockturtle/algorithms/experimental/decompose_multioutput.hpp
@@ -0,0 +1,398 @@
+/* mockturtle: C++ logic network library
+ * Copyright (C) 2018-2023  EPFL
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*!
+  \file decompose_multioutput.hpp
+  \brief Decomposes the multi-output gates into single output
+
+  \author Alessandro Tempia Calvino
+*/
+
+#pragma once
+
+#include <iostream>
+#include <type_traits>
+#include <vector>
+
+#include <kitty/operations.hpp>
+
+#include "../../traits.hpp"
+#include "../../utils/node_map.hpp"
+#include "../../views/topo_view.hpp"
+#include "../cleanup.hpp"
+
+namespace mockturtle
+{
+
+struct decompose_multioutput_params
+{
+  bool set_multioutput_as_dont_touch{ false };
+};
+
+namespace detail
+{
+
+template<typename NtkSrc, typename NtkDest, typename LeavesIterator>
+void decompose_multioutput_impl( NtkSrc const& ntk, NtkDest& dest, LeavesIterator begin, LeavesIterator end, std::unordered_map<uint64_t, signal<NtkDest>>& old_to_new, decompose_multioutput_params const& ps )
+{
+  /* constants */
+  old_to_new[ntk.get_constant( false )] = dest.get_constant( false );
+  if ( ntk.get_node( ntk.get_constant( true ) ) != ntk.get_node( ntk.get_constant( false ) ) )
+  {
+    old_to_new[ntk.get_constant( true )] = dest.get_constant( true );
+  }
+
+  /* create inputs in the same order */
+  auto it = begin;
+  ntk.foreach_pi( [&]( auto node ) {
+    old_to_new[ntk.make_signal( node )] = *it++;
+  } );
+  if constexpr ( has_foreach_ro_v<NtkSrc> )
+  {
+    ntk.foreach_ro( [&]( auto node ) {
+      old_to_new[ntk.make_signal( node )] = *it++;
+    } );
+  }
+  assert( it == end );
+  (void)end;
+
+  /* foreach node in topological order */
+  topo_view topo{ ntk };
+  topo.foreach_node( [&]( auto node ) {
+    if ( ntk.is_constant( node ) || ntk.is_ci( node ) )
+      return;
+
+    /* collect children */
+    std::vector<signal<NtkDest>> children;
+    ntk.foreach_fanin( node, [&]( auto child, auto ) {
+      const auto child_no_complement = child ^ ntk.is_complemented( child );
+      const auto f = old_to_new[child_no_complement];
+
+      assert( dest.get_node( f ) != dest.get_node( dest.get_constant( false ) ) );
+      assert( dest.get_node( f ) != dest.get_node( dest.get_constant( true ) ) );
+
+      children.push_back( f ^ ntk.is_complemented( child ) );
+    } );
+
+    /* clone node */
+    if ( ntk.is_multioutput( node ) )
+    {
+      for ( auto i = 0; i < ntk.num_outputs( node ); ++i )
+      {
+        auto f = ntk.make_signal( node, i );
+        do
+        {
+          if constexpr ( has_is_and_v<NtkSrc> )
+          {
+            static_assert( has_create_and_v<NtkDest>, "NtkDest cannot create AND gates" );
+            if ( ntk.is_and( f ) )
+            {
+              old_to_new[f] = dest.create_and( children[0], children[1] );
+              break;
+            }
+          }
+          if constexpr ( has_is_or_v<NtkSrc> )
+          {
+            static_assert( has_create_or_v<NtkDest>, "NtkDest cannot create OR gates" );
+            if ( ntk.is_or( f ) )
+            {
+              old_to_new[f] = dest.create_or( children[0], children[1] );
+              break;
+            }
+          }
+          if constexpr ( has_is_xor_v<NtkSrc> )
+          {
+            static_assert( has_create_xor_v<NtkDest>, "NtkDest cannot create XOR gates" );
+            if ( ntk.is_xor( f ) )
+            {
+              old_to_new[f] = dest.create_xor( children[0], children[1] );
+              break;
+            }
+          }
+          if constexpr ( has_is_maj_v<NtkSrc> )
+          {
+            static_assert( has_create_maj_v<NtkDest>, "NtkDest cannot create MAJ gates" );
+            if ( ntk.is_maj( f ) )
+            {
+              old_to_new[f] = dest.create_maj( children[0], children[1], children[2] );
+              break;
+            }
+          }
+          if constexpr ( has_is_ite_v<NtkSrc> )
+          {
+            static_assert( has_create_ite_v<NtkDest>, "NtkDest cannot create ITE gates" );
+            if ( ntk.is_ite( f ) )
+            {
+              old_to_new[f] = dest.create_ite( children[0], children[1], children[2] );
+              break;
+            }
+          }
+          if constexpr ( has_is_xor3_v<NtkSrc> )
+          {
+            static_assert( has_create_xor3_v<NtkDest>, "NtkDest cannot create XOR3 gates" );
+            if ( ntk.is_xor3( f ) )
+            {
+              old_to_new[f] = dest.create_xor3( children[0], children[1], children[2] );
+              break;
+            }
+          }
+          if constexpr ( has_is_function_v<NtkSrc> && has_create_node_v<NtkDest> )
+          {
+            old_to_new[f] = dest.create_node( children, ntk.node_function_pin( node, i ) );
+            break;
+          }
+          std::cerr << "[e] something went wrong, could not copy node " << ntk.node_to_index( node ) << "\n";
+        } while ( false );
+
+        /* set dont touch */
+        if constexpr ( has_select_dont_touch_v<NtkDest> )
+        {
+          if ( ps.set_multioutput_as_dont_touch )
+            dest.select_dont_touch( dest.get_node( old_to_new[f] ) );
+        }
+
+        /* copy name */
+        if constexpr ( has_has_name_v<NtkSrc> && has_get_name_v<NtkSrc> && has_set_name_v<NtkDest> )
+        {
+          if ( ntk.has_name( f ) )
+          {
+            dest.set_name( old_to_new[f], ntk.get_name( f ) );
+          }
+          if ( ntk.has_name( !f ) )
+          {
+            dest.set_name( !old_to_new[f], ntk.get_name( !f ) );
+          }
+        }
+      }
+    }
+    else
+    {
+      auto f = ntk.make_signal( node );
+      if constexpr ( std::is_same_v<NtkSrc, NtkDest> )
+      {
+        old_to_new[f] = dest.clone_node( ntk, node, children );
+      }
+      else
+      {
+        do
+        {
+          if constexpr ( has_is_and_v<NtkSrc> )
+          {
+            static_assert( has_create_and_v<NtkDest>, "NtkDest cannot create AND gates" );
+            if ( ntk.is_and( node ) )
+            {
+              old_to_new[f] = dest.create_and( children[0], children[1] );
+              break;
+            }
+          }
+          if constexpr ( has_is_or_v<NtkSrc> )
+          {
+            static_assert( has_create_or_v<NtkDest>, "NtkDest cannot create OR gates" );
+            if ( ntk.is_or( node ) )
+            {
+              old_to_new[f] = dest.create_or( children[0], children[1] );
+              break;
+            }
+          }
+          if constexpr ( has_is_xor_v<NtkSrc> )
+          {
+            static_assert( has_create_xor_v<NtkDest>, "NtkDest cannot create XOR gates" );
+            if ( ntk.is_xor( node ) )
+            {
+              old_to_new[f] = dest.create_xor( children[0], children[1] );
+              break;
+            }
+          }
+          if constexpr ( has_is_maj_v<NtkSrc> )
+          {
+            static_assert( has_create_maj_v<NtkDest>, "NtkDest cannot create MAJ gates" );
+            if ( ntk.is_maj( node ) )
+            {
+              old_to_new[f] = dest.create_maj( children[0], children[1], children[2] );
+              break;
+            }
+          }
+          if constexpr ( has_is_ite_v<NtkSrc> )
+          {
+            static_assert( has_create_ite_v<NtkDest>, "NtkDest cannot create ITE gates" );
+            if ( ntk.is_ite( node ) )
+            {
+              old_to_new[f] = dest.create_ite( children[0], children[1], children[2] );
+              break;
+            }
+          }
+          if constexpr ( has_is_xor3_v<NtkSrc> )
+          {
+            static_assert( has_create_xor3_v<NtkDest>, "NtkDest cannot create XOR3 gates" );
+            if ( ntk.is_xor3( node ) )
+            {
+              old_to_new[f] = dest.create_xor3( children[0], children[1], children[2] );
+              break;
+            }
+          }
+          if constexpr ( has_is_nary_and_v<NtkSrc> )
+          {
+            static_assert( has_create_nary_and_v<NtkDest>, "NtkDest cannot create n-ary AND gates" );
+            if ( ntk.is_nary_and( node ) )
+            {
+              old_to_new[f] = dest.create_nary_and( children );
+              break;
+            }
+          }
+          if constexpr ( has_is_nary_or_v<NtkSrc> )
+          {
+            static_assert( has_create_nary_or_v<NtkDest>, "NtkDest cannot create n-ary OR gates" );
+            if ( ntk.is_nary_or( node ) )
+            {
+              old_to_new[f] = dest.create_nary_or( children );
+              break;
+            }
+          }
+          if constexpr ( has_is_nary_xor_v<NtkSrc> )
+          {
+            static_assert( has_create_nary_xor_v<NtkDest>, "NtkDest cannot create n-ary XOR gates" );
+            if ( ntk.is_nary_xor( node ) )
+            {
+              old_to_new[f] = dest.create_nary_xor( children );
+              break;
+            }
+          }
+          if constexpr ( has_is_function_v<NtkSrc> && has_create_node_v<NtkDest> )
+          {
+            old_to_new[f] = dest.create_node( children, ntk.node_function( node ) );
+            break;
+          }
+          std::cerr << "[e] something went wrong, could not copy node " << ntk.node_to_index( node ) << "\n";
+        } while ( false );
+
+        /* copy name */
+        if constexpr ( has_has_name_v<NtkSrc> && has_get_name_v<NtkSrc> && has_set_name_v<NtkDest> )
+        {
+          if ( ntk.has_name( f ) )
+          {
+            dest.set_name( old_to_new[f], ntk.get_name( f ) );
+          }
+          if ( ntk.has_name( !f ) )
+          {
+            dest.set_name( !old_to_new[f], ntk.get_name( !f ) );
+          }
+        }
+      }
+    }
+  } );
+
+  /* POs */
+  ntk.foreach_po( [&]( auto const& po ) {
+    const auto po_no_complement = po ^ ntk.is_complemented( po );
+    auto const f = old_to_new[po_no_complement];
+    dest.create_po( f ^ ntk.is_complemented( po ) );
+  } );
+
+  /* RIs */
+  if constexpr ( has_foreach_ri_v<NtkSrc> && has_create_ri_v<NtkDest> )
+  {
+    ntk.foreach_ri( [&]( auto const& f ) {
+      dest.create_ri( old_to_new[f ^ ntk.is_complemented( f )] ^ ntk.is_complemented( f ) );
+    } );
+  }
+
+  /* CO names */
+  if constexpr ( has_has_output_name_v<NtkSrc> && has_get_output_name_v<NtkSrc> && has_set_output_name_v<NtkDest> )
+  {
+    ntk.foreach_co( [&]( auto co, auto index ) {
+      (void)co;
+      if ( ntk.has_output_name( index ) )
+      {
+        dest.set_output_name( index, ntk.get_output_name( index ) );
+      }
+    } );
+  }
+}
+
+} // namespace detail
+
+/*! \brief Decomposes the multi-output gates into single output.
+ *
+ * This method reconstructs a network decomposing the multi-output gates into
+ * single output gates. Moreover, it omits all dangling nodes.
+ *
+   \verbatim embed:rst
+
+   .. note::
+
+      This method returns the cleaned up network as a return value.  It does
+      *not* modify the input network.
+   \endverbatim
+ *
+ * **Required network functions:**
+ * - `get_node`
+ * - `node_to_index`
+ * - `get_constant`
+ * - `create_pi`
+ * - `create_po`
+ * - `create_not`
+ * - `is_complemented`
+ * - `foreach_node`
+ * - `foreach_pi`
+ * - `foreach_po`
+ * - `clone_node`
+ * - `is_pi`
+ * - `is_constant`
+ * - `has_multioutput`
+ */
+template<class NtkSrc, class NtkDest = NtkSrc>
+[[nodiscard]] NtkDest decompose_multioutput( NtkSrc const& ntk, decompose_multioutput_params const& ps = {} )
+{
+  static_assert( is_network_type_v<NtkSrc>, "NtkSrc is not a network type" );
+  static_assert( is_network_type_v<NtkDest>, "NtkDest is not a network type" );
+  static_assert( has_get_node_v<NtkSrc>, "NtkSrc does not implement the get_node method" );
+  static_assert( has_node_to_index_v<NtkSrc>, "NtkSrc does not implement the node_to_index method" );
+  static_assert( has_get_constant_v<NtkSrc>, "NtkSrc does not implement the get_constant method" );
+  static_assert( has_foreach_node_v<NtkSrc>, "NtkSrc does not implement the foreach_node method" );
+  static_assert( has_foreach_pi_v<NtkSrc>, "NtkSrc does not implement the foreach_pi method" );
+  static_assert( has_foreach_po_v<NtkSrc>, "NtkSrc does not implement the foreach_po method" );
+  static_assert( has_is_pi_v<NtkSrc>, "NtkSrc does not implement the is_pi method" );
+  static_assert( has_is_constant_v<NtkSrc>, "NtkSrc does not implement the is_constant method" );
+  static_assert( has_clone_node_v<NtkDest>, "NtkDest does not implement the clone_node method" );
+  static_assert( has_create_pi_v<NtkDest>, "NtkDest does not implement the create_pi method" );
+  static_assert( has_create_po_v<NtkDest>, "NtkDest does not implement the create_po method" );
+  static_assert( has_create_not_v<NtkDest>, "NtkDest does not implement the create_not method" );
+  static_assert( has_is_complemented_v<NtkSrc>, "NtkSrc does not implement the is_complemented method" );
+  static_assert( has_is_multioutput_v<NtkSrc>, "NtkSource does not implement the is_complemented method" );
+  static_assert( has_node_function_pin_v<NtkSrc>, "NtkSource does not implement the node_function_pin" );
+  static_assert( has_num_outputs_v<NtkSrc>, "NtkSource does not implement the has_num_outputs" );
+
+  NtkDest dest;
+
+  std::vector<signal<NtkDest>> cis;
+  detail::clone_inputs( ntk, dest, cis, false );
+
+  std::unordered_map<uint64_t, signal<NtkDest>> old_to_new;
+  detail::decompose_multioutput_impl( ntk, dest, cis.begin(), cis.end(), old_to_new, ps );
+
+  return dest;
+}
+
+} // namespace mockturtle
diff --git a/include/mockturtle/algorithms/experimental/emap.hpp b/include/mockturtle/algorithms/experimental/emap.hpp
new file mode 100644
index 000000000..6b51cd023
--- /dev/null
+++ b/include/mockturtle/algorithms/experimental/emap.hpp
@@ -0,0 +1,5398 @@
+/* mockturtle: C++ logic network library
+ * Copyright (C) 2018-2023  EPFL
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*!
+  \file emap.hpp
+  \brief An extended technology mapper
+
+  \author Alessandro Tempia Calvino
+*/
+
+#pragma once
+
+#include <chrono>
+#include <cstdint>
+#include <limits>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include <kitty/constructors.hpp>
+#include <kitty/dynamic_truth_table.hpp>
+#include <kitty/hash.hpp>
+#include <kitty/static_truth_table.hpp>
+
+#include <fmt/format.h>
+#include <parallel_hashmap/phmap.h>
+
+#include "../../networks/block.hpp"
+#include "../../networks/klut.hpp"
+#include "../../utils/cuts.hpp"
+#include "../../utils/node_map.hpp"
+#include "../../utils/stopwatch.hpp"
+#include "../../utils/tech_library.hpp"
+#include "../../views/binding_view.hpp"
+#include "../../views/cell_view.hpp"
+#include "../../views/choice_view.hpp"
+#include "../../views/topo_view.hpp"
+#include "../cleanup.hpp"
+#include "../cut_enumeration.hpp"
+#include "../detail/mffc_utils.hpp"
+#include "../detail/switching_activity.hpp"
+
+namespace mockturtle
+{
+
+/*! \brief Parameters for emap.
+ *
+ * The data structure `emap_params` holds configurable parameters
+ * with default arguments for `emap`.
+ */
+struct emap_params
+{
+  emap_params()
+  {
+    cut_enumeration_ps.cut_limit = 16;
+    cut_enumeration_ps.minimize_truth_table = true;
+  }
+
+  /*! \brief Parameters for cut enumeration
+   *
+   * The default cut limit is 16.
+   * The maximum cut limit is 15.
+   * By default, truth table minimization
+   * is performed.
+   */
+  cut_enumeration_params cut_enumeration_ps{};
+
+  /*! \brief Do area-oriented mapping. */
+  bool area_oriented_mapping{ false };
+
+  /*! \brief Maps using multi-output gates */
+  bool map_multioutput{ false };
+
+  /*! \brief Required time for delay optimization. */
+  double required_time{ 0.0f };
+
+  /*! \brief Required time relaxation ratio. */
+  double relax_required{ 0.0f };
+
+  /*! \brief Number of rounds for area flow optimization. */
+  uint32_t area_flow_rounds{ 2u };
+
+  /*! \brief Number of rounds for exact area optimization. */
+  uint32_t ela_rounds{ 2u };
+
+  /*! \brief Number of rounds for exact switching power optimization. */
+  uint32_t eswp_rounds{ 0u };
+
+  /*! \brief Number of patterns for switching activity computation. */
+  uint32_t switching_activity_patterns{ 2048u };
+
+  /*! \brief Fast area recovery */
+  bool use_fast_area_recovery{ true };
+
+  /*! \brief Remove the cuts that are contained in others */
+  bool remove_dominated_cuts{ false };
+
+  /*! \brief Remove overlapping multi-output cuts */
+  bool remove_overlapping_multicuts{ false };
+
+  /*! \brief Doesn't allow node duplication */
+  bool allow_node_duplication{ true };
+
+  /*! \brief Be verbose. */
+  bool verbose{ false };
+};
+
+/*! \brief Statistics for emap.
+ *
+ * The data structure `emap_stats` provides data collected by running
+ * `emap`.
+ */
+struct emap_stats
+{
+  /*! \brief Area result. */
+  double area{ 0 };
+  /*! \brief Worst delay result. */
+  double delay{ 0 };
+  /*! \brief Power result. */
+  double power{ 0 };
+  /*! \brief Power result. */
+  uint32_t inverters{ 0 };
+
+  /*! \brief Mapped multi-output gates. */
+  uint32_t multioutput_gates{ 0 };
+
+  /*! \brief Runtime for multi-output matching. */
+  stopwatch<>::duration time_multioutput{ 0 };
+  /*! \brief Total runtime. */
+  stopwatch<>::duration time_total{ 0 };
+
+  /*! \brief Cut enumeration stats. */
+  cut_enumeration_stats cut_enumeration_st{};
+
+  /*! \brief Delay and area stats for each round. */
+  std::vector<std::string> round_stats{};
+
+  /*! \brief Mapping error. */
+  bool mapping_error{ false };
+
+  void report() const
+  {
+    for ( auto const& stat : round_stats )
+    {
+      std::cout << stat;
+    }
+    std::cout << fmt::format( "[i] Area = {:>5.2f}; Delay = {:>5.2f};", area, delay );
+    if ( power != 0 )
+      std::cout << fmt::format( " Power = {:>5.2f};\n", power );
+    else
+      std::cout << "\n";
+    if ( multioutput_gates )
+    {
+      std::cout << fmt::format( "[i] Multi-output gates   = {:>5}\n", multioutput_gates );
+      std::cout << fmt::format( "[i] Multi-output runtime = {:>5.2f} secs\n", to_seconds( time_multioutput ) );
+    }
+    std::cout << fmt::format( "[i] Total runtime        = {:>5.2f} secs\n", to_seconds( time_total ) );
+  }
+};
+
+namespace detail
+{
+
+#pragma region cut set
+template<unsigned NInputs>
+struct cut_enumeration_emap_cut
+{
+  /* stats */
+  double delay{ 0 };
+  double flow{ 0 };
+  bool ignore{ false };
+
+  /* function */
+  kitty::static_truth_table<6> function;
+
+  /* list of supergates matching the cut for positive and negative output phases */
+  std::array<std::vector<supergate<NInputs>> const*, 2> supergates = { nullptr, nullptr };
+  /* input negations, 0: pos, 1: neg */
+  std::array<uint16_t, 2> negations{ 0, 0 };
+};
+
+struct cut_enumeration_emap_multi_cut
+{
+  /* stats */
+  uint64_t id{ 0 };
+};
+
+enum class emap_cut_sort_type
+{
+  DELAY = 0,
+  DELAY2 = 1,
+  AREA = 2,
+  AREA2 = 3,
+  NONE = 4
+};
+
+template<typename CutType, uint32_t MaxCuts>
+class emap_cut_set
+{
+public:
+  /*! \brief Standard constructor.
+   */
+  emap_cut_set()
+  {
+    clear();
+  }
+
+  /*! \brief Assignment operator.
+   */
+  emap_cut_set& operator=( emap_cut_set const& other )
+  {
+    if ( this != &other )
+    {
+      _pcend = _pend = _pcuts.begin();
+      _set_limit = other._set_limit;
+
+      auto it = other.begin();
+      while ( it != other.end() )
+      {
+        **_pend++ = **it++;
+        ++_pcend;
+      }
+    }
+
+    return *this;
+  }
+
+  /*! \brief Clears a cut set.
+   */
+  void clear()
+  {
+    _pcend = _pend = _pcuts.begin();
+    auto pit = _pcuts.begin();
+    for ( auto& c : _cuts )
+    {
+      *pit++ = &c;
+    }
+  }
+
+  /*! \brief Sets the cut limit.
+   */
+  void set_cut_limit( uint32_t limit )
+  {
+    _set_limit = std::min( MaxCuts, limit );
+  }
+
+  /*! \brief Adds a cut to the end of the set.
+   *
+   * This function should only be called to create a set of cuts which is known
+   * to be sorted and irredundant (i.e., no cut in the set dominates another
+   * cut).
+   *
+   * \param begin Begin iterator to leaf indexes
+   * \param end End iterator (exclusive) to leaf indexes
+   * \return Reference to the added cut
+   */
+  template<typename Iterator>
+  CutType& add_cut( Iterator begin, Iterator end )
+  {
+    assert( _pend != _pcuts.end() );
+
+    auto& cut = **_pend++;
+    cut.set_leaves( begin, end );
+
+    ++_pcend;
+    return cut;
+  }
+
+  /*! \brief Appends a cut to the end of the set.
+   *
+   * This function should only be called to create a set of cuts which is known
+   * to be sorted and irredundant (i.e., no cut in the set dominates another
+   * cut).
+   *
+   * \param cut Cut to insert
+   */
+  void append_cut( CutType const& cut )
+  {
+    assert( _pend != _pcuts.end() );
+
+    **_pend++ = cut;
+    ++_pcend;
+  }
+
+  /*! \brief Checks whether cut is dominates by any cut in the set.
+   *
+   * \param cut Cut outside of the set
+   */
+  bool is_dominated( CutType const& cut ) const
+  {
+    return std::find_if( _pcuts.begin(), _pcend, [&cut]( auto const* other ) { return other->dominates( cut ); } ) != _pcend;
+  }
+
+  static bool sort_delay( CutType const& c1, CutType const& c2 )
+  {
+    constexpr auto eps{ 0.005f };
+    if ( !c1->ignore && c2->ignore )
+      return true;
+    if ( c1->ignore && !c2->ignore )
+      return false;
+    if ( c1->delay < c2->delay - eps )
+      return true;
+    if ( c1->delay > c2->delay + eps )
+      return false;
+    if ( c1->flow < c2->flow - eps )
+      return true;
+    if ( c1->flow > c2->flow + eps )
+      return false;
+    return c1.size() < c2.size();
+  }
+
+  static bool sort_delay2( CutType const& c1, CutType const& c2 )
+  {
+    constexpr auto eps{ 0.005f };
+    if ( !c1->ignore && c2->ignore )
+      return true;
+    if ( c1->ignore && !c2->ignore )
+      return false;
+    if ( c1.size() < c2.size() )
+      return true;
+    if ( c1.size() > c2.size() )
+      return false;
+    if ( c1->delay < c2->delay - eps )
+      return true;
+    if ( c1->delay > c2->delay + eps )
+      return false;
+    return c1->flow < c2->flow - eps;
+  }
+
+  static bool sort_area( CutType const& c1, CutType const& c2 )
+  {
+    constexpr auto eps{ 0.005f };
+    if ( !c1->ignore && c2->ignore )
+      return true;
+    if ( c1->ignore && !c2->ignore )
+      return false;
+    if ( c1->flow < c2->flow - eps )
+      return true;
+    if ( c1->flow > c2->flow + eps )
+      return false;
+    if ( c1.size() < c2.size() )
+      return true;
+    if ( c1.size() > c2.size() )
+      return false;
+    return c1->delay < c2->delay - eps;
+  }
+
+  static bool sort_area2( CutType const& c1, CutType const& c2 )
+  {
+    constexpr auto eps{ 0.005f };
+    if ( !c1->ignore && c2->ignore )
+      return true;
+    if ( c1->ignore && !c2->ignore )
+      return false;
+    if ( c1->flow < c2->flow - eps )
+      return true;
+    if ( c1->flow > c2->flow + eps )
+      return false;
+    if ( c1->delay < c2->delay - eps )
+      return true;
+    if ( c1->delay > c2->delay + eps )
+      return false;
+    return c1.size() < c2.size();
+  }
+
+  /*! \brief Compare two cuts using sorting functions.
+   *
+   * This method compares two cuts using a sorting function.
+   *
+   * \param cut1 first cut.
+   * \param cut2 second cut.
+   * \param sort sorting function.
+   */
+  static bool compare( CutType const& cut1, CutType const& cut2, emap_cut_sort_type sort = emap_cut_sort_type::NONE )
+  {
+    if ( sort == emap_cut_sort_type::DELAY )
+    {
+      return sort_delay( cut1, cut2 );
+    }
+    else if ( sort == emap_cut_sort_type::DELAY2 )
+    {
+      return sort_delay2( cut1, cut2 );
+    }
+    else if ( sort == emap_cut_sort_type::AREA )
+    {
+      return sort_area( cut1, cut2 );
+    }
+    else if ( sort == emap_cut_sort_type::AREA2 )
+    {
+      return sort_area2( cut1, cut2 );
+    }
+    else
+    {
+      return false;
+    }
+  }
+
+  /*! \brief Inserts a cut into a set without checking dominance.
+   *
+   * This method will insert a cut into a set and maintain an order.  This
+   * method doesn't remove the cuts that are dominated by `cut`.
+   *
+   * If `cut` is dominated by any of the cuts in the set, it will still be
+   * inserted.  The caller is responsible to check whether `cut` is dominated
+   * before inserting it into the set.
+   *
+   * \param cut Cut to insert.
+   * \param sort Cut prioritization function.
+   */
+  void simple_insert( CutType const& cut, emap_cut_sort_type sort = emap_cut_sort_type::NONE )
+  {
+    /* insert cut in a sorted way */
+    typename std::array<CutType*, MaxCuts>::iterator ipos = _pcuts.begin();
+
+    bool limit_reached = std::distance( _pcuts.begin(), _pend ) >= _set_limit;
+
+    /* do not insert if worst than set_limit */
+    if ( limit_reached )
+    {
+      if ( sort == emap_cut_sort_type::AREA && !sort_area( cut, **( ipos + _set_limit - 1 ) ) )
+      {
+        return;
+      }
+      else if ( sort != emap_cut_sort_type::AREA )
+      {
+        return;
+      }
+    }
+
+    if ( sort == emap_cut_sort_type::NONE )
+    {
+      ipos = _pend;
+    }
+    else /* AREA */
+    {
+      ipos = std::upper_bound( _pcuts.begin(), _pend, &cut, []( auto a, auto b ) { return sort_area( *a, *b ); } );
+    }
+
+    /* too many cuts, we need to remove one */
+    if ( _pend == _pcuts.end() || limit_reached )
+    {
+      /* cut to be inserted is worse than all the others, return */
+      if ( ipos == _pend )
+      {
+        return;
+      }
+      else
+      {
+        /* remove last cut */
+        --_pend;
+        --_pcend;
+      }
+    }
+
+    /* copy cut */
+    auto& icut = *_pend;
+    icut->set_leaves( cut.begin(), cut.end() );
+    icut->data() = cut.data();
+
+    if ( ipos != _pend )
+    {
+      auto it = _pend;
+      while ( it > ipos )
+      {
+        std::swap( *it, *( it - 1 ) );
+        --it;
+      }
+    }
+
+    /* update iterators */
+    _pcend++;
+    _pend++;
+  }
+
+  /*! \brief Inserts a cut into a set.
+   *
+   * This method will insert a cut into a set and maintain an order.  Before the
+   * cut is inserted into the correct position, it will remove all cuts that are
+   * dominated by `cut`. Variable `skip0` tell to skip the dominance check on
+   * cut zero.
+   *
+   * If `cut` is dominated by any of the cuts in the set, it will still be
+   * inserted.  The caller is responsible to check whether `cut` is dominated
+   * before inserting it into the set.
+   *
+   * \param cut Cut to insert.
+   * \param skip0 Skip dominance check on cut zero.
+   * \param sort Cut prioritization function.
+   */
+  void insert( CutType const& cut, bool skip0 = false, emap_cut_sort_type sort = emap_cut_sort_type::NONE )
+  {
+    auto begin = _pcuts.begin();
+
+    if ( skip0 && _pend != _pcuts.begin() )
+      ++begin;
+
+    /* remove elements that are dominated by new cut */
+    _pcend = _pend = std::stable_partition( begin, _pend, [&cut]( auto const* other ) { return !cut.dominates( *other ); } );
+
+    /* insert cut in a sorted way */
+    simple_insert( cut, sort );
+  }
+
+  /*! \brief Replaces a cut of the set.
+   *
+   * This method replaces the cut at position `index` in the set by `cut`
+   * and maintains the cuts order. The function does not check whether
+   * index is in the valid range.
+   *
+   * \param index Index of the cut to replace.
+   * \param cut Cut to insert.
+   */
+  void replace( uint32_t index, CutType const& cut )
+  {
+    *_pcuts[index] = cut;
+  }
+
+  /*! \brief Begin iterator (constant).
+   *
+   * The iterator will point to a cut pointer.
+   */
+  auto begin() const { return _pcuts.begin(); }
+
+  /*! \brief End iterator (constant). */
+  auto end() const { return _pcend; }
+
+  /*! \brief Begin iterator (mutable).
+   *
+   * The iterator will point to a cut pointer.
+   */
+  auto begin() { return _pcuts.begin(); }
+
+  /*! \brief End iterator (mutable). */
+  auto end() { return _pend; }
+
+  /*! \brief Number of cuts in the set. */
+  auto size() const { return _pcend - _pcuts.begin(); }
+
+  /*! \brief Returns reference to cut at index.
+   *
+   * This function does not return the cut pointer but dereferences it and
+   * returns a reference.  The function does not check whether index is in the
+   * valid range.
+   *
+   * \param index Index
+   */
+  auto const& operator[]( uint32_t index ) const { return *_pcuts[index]; }
+
+  /*! \brief Returns the best cut, i.e., the first cut.
+   */
+  auto const& best() const { return *_pcuts[0]; }
+
+  /*! \brief Updates the best cut.
+   *
+   * This method will set the cut at index `index` to be the best cut.  All
+   * cuts before `index` will be moved one position higher.
+   *
+   * \param index Index of new best cut
+   */
+  void update_best( uint32_t index )
+  {
+    auto* best = _pcuts[index];
+    for ( auto i = index; i > 0; --i )
+    {
+      _pcuts[i] = _pcuts[i - 1];
+    }
+    _pcuts[0] = best;
+  }
+
+  /*! \brief Resize the cut set, if it is too large.
+   *
+   * This method will resize the cut set to `size` only if the cut set has more
+   * than `size` elements.  Otherwise, the size will remain the same.
+   */
+  void limit( uint32_t size )
+  {
+    if ( std::distance( _pcuts.begin(), _pend ) > static_cast<long>( size ) )
+    {
+      _pcend = _pend = _pcuts.begin() + size;
+    }
+  }
+
+  /*! \brief Prints a cut set. */
+  friend std::ostream& operator<<( std::ostream& os, emap_cut_set const& set )
+  {
+    for ( auto const& c : set )
+    {
+      os << *c << "\n";
+    }
+    return os;
+  }
+
+  /*! \brief Returns if the cut set contains already `cut`. */
+  bool is_contained( CutType const& cut )
+  {
+    typename std::array<CutType*, MaxCuts>::iterator ipos = _pcuts.begin();
+
+    while ( ipos != _pend )
+    {
+      if ( ( *ipos )->signature() == cut.signature() )
+        return true;
+      ++ipos;
+    }
+
+    return false;
+  }
+
+private:
+  std::array<CutType, MaxCuts> _cuts;
+  std::array<CutType*, MaxCuts> _pcuts;
+  typename std::array<CutType*, MaxCuts>::const_iterator _pcend{ _pcuts.begin() };
+  typename std::array<CutType*, MaxCuts>::iterator _pend{ _pcuts.begin() };
+  uint32_t _set_limit{ MaxCuts };
+};
+#pragma endregion
+
+#pragma region Hashing
+template<uint32_t max_multioutput_cut_size>
+struct emap_triple_hash
+{
+  inline uint64_t operator()( const std::array<uint32_t, max_multioutput_cut_size>& p ) const
+  {
+    uint64_t seed = hash_block( p[0] );
+
+    for ( uint32_t i = 1; i < max_multioutput_cut_size; ++i )
+    {
+      hash_combine( seed, hash_block( p[i] ) );
+    }
+
+    return seed;
+  }
+};
+#pragma endregion
+
+template<unsigned NInputs>
+struct node_match_emap
+{
+  /* best gate match for positive and negative output phases */
+  supergate<NInputs> const* best_supergate[2];
+  /* fanin pin phases for both output phases */
+  uint16_t phase[2];
+  /* best cut index for both phases */
+  uint32_t best_cut[2];
+  /* node is mapped using only one phase */
+  bool same_match;
+  /* node is mapped to a multi-output gate */
+  bool multioutput_match[2];
+
+  /* arrival time at node output */
+  double arrival[2];
+  /* required time at node output */
+  double required[2];
+  /* area of the best matches */
+  float area[2];
+
+  /* number of references in the cover 0: pos, 1: neg, 2: pos+neg */
+  uint32_t map_refs[3];
+  /* references estimation */
+  float est_refs[3];
+  /* area flow */
+  float flows[2];
+};
+
+union multi_match_data
+{
+  uint64_t data{ 0 };
+  struct
+  {
+    uint64_t in_tfi : 1;
+    uint64_t cut_index : 31;
+    uint64_t node_index : 32;
+  };
+};
+
+template<class Ntk, unsigned CutSize, unsigned NInputs, classification_type Configuration>
+class emap_impl
+{
+public:
+  static constexpr float epsilon = 0.0005;
+  static constexpr uint32_t max_cut_num = 32;
+  static constexpr uint32_t max_cut_leaves = 6;
+  using cut_t = cut<max_cut_leaves, cut_enumeration_emap_cut<NInputs>>;
+  using cut_set_t = emap_cut_set<cut_t, max_cut_num>;
+  using cut_merge_t = typename std::array<cut_set_t*, Ntk::max_fanin_size + 1>;
+  using fanin_cut_t = typename std::array<cut_t const*, Ntk::max_fanin_size>;
+  using support_t = typename std::array<uint8_t, CutSize>;
+  using TT = kitty::static_truth_table<6>;
+  using truth_compute_t = typename std::array<TT, CutSize>;
+  using node_match_t = std::vector<node_match_emap<NInputs>>;
+  using klut_map = std::unordered_map<uint32_t, std::array<signal<klut_network>, 2>>;
+  using block_map = std::unordered_map<uint32_t, std::array<signal<block_network>, 2>>;
+
+  static constexpr uint32_t max_multioutput_cut_size = 3;
+  static constexpr uint32_t max_multioutput_output_size = 2;
+  using multi_cuts_t = fast_network_cuts<Ntk, max_multioutput_cut_size, true, cut_enumeration_emap_multi_cut>;
+  using multi_cut_t = typename multi_cuts_t::cut_t;
+  using multi_leaves_set_t = std::array<uint32_t, max_multioutput_cut_size>;
+  using multi_output_set_t = std::vector<multi_match_data>;
+  using multi_hash_t = phmap::flat_hash_map<multi_leaves_set_t, multi_output_set_t, emap_triple_hash<max_multioutput_cut_size>>;
+  using multi_match_t = std::array<multi_match_data, max_multioutput_output_size>;
+  using multi_cut_set_t = std::vector<std::array<cut_t, max_multioutput_output_size>>;
+  using multi_single_matches_t = std::vector<multi_match_t>;
+  using multi_matches_t = std::vector<std::vector<multi_match_t>>;
+
+  using clock = typename std::chrono::steady_clock;
+  using time_point = typename clock::time_point;
+
+public:
+  explicit emap_impl( Ntk const& ntk, tech_library<NInputs, Configuration> const& library, emap_params const& ps, emap_stats& st )
+      : ntk( ntk ),
+        library( library ),
+        ps( ps ),
+        st( st ),
+        node_match( ntk.size() ),
+        node_tuple_match( ntk.size(), UINT32_MAX ),
+        switch_activity( ps.eswp_rounds ? switching_activity( ntk, ps.switching_activity_patterns ) : std::vector<float>( 0 ) ),
+        cuts( ntk.size() )
+  {
+    static_assert( CutSize <= max_cut_leaves, "CutSize is too large for the pre-allocated size\n" );
+
+    std::tie( lib_inv_area, lib_inv_delay, lib_inv_id ) = library.get_inverter_info();
+    std::tie( lib_buf_area, lib_buf_delay, lib_buf_id ) = library.get_buffer_info();
+    tmp_visited.reserve( 100 );
+  }
+
+  explicit emap_impl( Ntk const& ntk, tech_library<NInputs, Configuration> const& library, std::vector<float> const& switch_activity, emap_params const& ps, emap_stats& st )
+      : ntk( ntk ),
+        library( library ),
+        ps( ps ),
+        st( st ),
+        node_match( ntk.size() ),
+        node_tuple_match( ntk.size(), UINT32_MAX ),
+        switch_activity( switch_activity ),
+        cuts( ntk.size() )
+  {
+    static_assert( CutSize <= max_cut_leaves, "CutSize is too large for the pre-allocated size\n" );
+
+    std::tie( lib_inv_area, lib_inv_delay, lib_inv_id ) = library.get_inverter_info();
+    std::tie( lib_buf_area, lib_buf_delay, lib_buf_id ) = library.get_buffer_info();
+    tmp_visited.reserve( 100 );
+  }
+
+  binding_view<klut_network> run()
+  {
+    time_begin = clock::now();
+
+    auto [res, old2new] = initialize_map_network();
+
+    /* multi-output initialization */
+    if ( ps.map_multioutput )
+    {
+      compute_multioutput_match();
+    }
+
+    /* compute and save topological order */
+    init_topo_order();
+
+    /* compute cuts, matches, and initial mapping */
+    if ( !ps.area_oriented_mapping )
+    {
+      if ( !compute_mapping_match<false>() )
+      {
+        return res;
+      }
+    }
+    else
+    {
+      if ( !compute_mapping_match<true>() )
+      {
+        return res;
+      }
+    }
+
+    /* run area recovery */
+    if ( !improve_mapping() )
+      return res;
+
+    /* insert buffers for POs driven by PIs */
+    insert_buffers();
+
+    /* generate the output network */
+    finalize_cover( res, old2new );
+    st.time_total = ( clock::now() - time_begin );
+
+    return res;
+  }
+
+  cell_view<block_network> run_block()
+  {
+    time_begin = clock::now();
+
+    auto [res, old2new] = initialize_block_network();
+
+    /* multi-output initialization */
+    if ( ps.map_multioutput )
+    {
+      compute_multioutput_match();
+    }
+
+    /* compute and save topological order */
+    init_topo_order();
+
+    /* compute cuts, matches, and initial mapping */
+    if ( !ps.area_oriented_mapping )
+    {
+      if ( !compute_mapping_match<false>() )
+      {
+        return res;
+      }
+    }
+    else
+    {
+      if ( !compute_mapping_match<true>() )
+      {
+        return res;
+      }
+    }
+
+    /* run area recovery */
+    if ( !improve_mapping() )
+      return res;
+
+    /* insert buffers for POs driven by PIs */
+    insert_buffers();
+
+    /* generate the output network */
+    finalize_cover_block( res, old2new );
+    st.time_total = ( clock::now() - time_begin );
+
+    return res;
+  }
+
+  binding_view<klut_network> run_node_map()
+  {
+    time_begin = clock::now();
+
+    auto [res, old2new] = initialize_map_network();
+
+    /* TODO: multi-output support is currently not implemented */
+
+    /* compute and save topological order */
+    init_topo_order();
+
+    /* compute cuts, matches, and initial mapping */
+    if ( !ps.area_oriented_mapping )
+    {
+      if ( !compute_mapping_match_node<false>() )
+      {
+        return res;
+      }
+    }
+    else
+    {
+      if ( !compute_mapping_match_node<true>() )
+      {
+        return res;
+      }
+    }
+
+    /* run area recovery */
+    if ( !improve_mapping() )
+      return res;
+
+    /* insert buffers for POs driven by PIs */
+    insert_buffers();
+
+    /* generate the output network */
+    finalize_cover( res, old2new );
+    st.time_total = ( clock::now() - time_begin );
+
+    return res;
+  }
+
+private:
+  bool improve_mapping()
+  {
+    /* compute mapping using global area flow */
+    uint32_t i = 0;
+    while ( i++ < ps.area_flow_rounds )
+    {
+      compute_required_time();
+      if ( !compute_mapping<true>() )
+      {
+        return false;
+      }
+    }
+
+    /* compute mapping using exact area */
+    i = 0;
+    if ( ps.use_fast_area_recovery )
+    {
+      compute_required_time( true );
+      reindex_multioutput_data();
+      while ( i++ < ps.ela_rounds )
+      {
+        if ( !compute_mapping_exact_reversed<false>( i == ps.ela_rounds ) )
+        {
+          return false;
+        }
+      }
+
+      /* compute mapping using exact switching activity estimation */
+      i = 0;
+      while ( i++ < ps.eswp_rounds )
+      {
+        if ( !compute_mapping_exact_reversed<true>( true ) )
+        {
+          return false;
+        }
+      }
+    }
+    else
+    {
+      while ( i++ < ps.ela_rounds )
+      {
+        compute_required_time();
+        if ( !compute_mapping_exact<false>( i == ps.ela_rounds ) )
+        {
+          return false;
+        }
+      }
+
+      /* compute mapping using exact switching activity estimation */
+      i = 0;
+      while ( i++ < ps.eswp_rounds )
+      {
+        compute_required_time();
+        if ( !compute_mapping_exact<true>( true ) )
+        {
+          return false;
+        }
+      }
+
+      /* cleaning not fully utilized multi-output gates */
+      if ( ps.map_multioutput )
+      {
+        remove_unused_multioutput();
+      }
+    }
+
+    return true;
+  }
+
+#pragma region Core
+  template<bool DO_AREA>
+  bool compute_mapping_match()
+  {
+    bool warning_box = false;
+
+    for ( auto const& n : topo_order )
+    {
+      auto const index = ntk.node_to_index( n );
+      auto& node_data = node_match[index];
+
+      node_data.est_refs[0] = node_data.est_refs[1] = node_data.est_refs[2] = static_cast<double>( ntk.fanout_size( n ) );
+      node_data.map_refs[0] = node_data.map_refs[1] = node_data.map_refs[2] = 0;
+      node_data.required[0] = node_data.required[1] = std::numeric_limits<float>::max();
+
+      if ( ntk.is_constant( n ) )
+      {
+        /* all terminals have flow 0.0 */
+        node_data.flows[0] = node_data.flows[1] = 0.0f;
+        node_data.arrival[0] = node_data.arrival[1] = 0.0f;
+        add_zero_cut( index );
+        match_constants( index );
+        continue;
+      }
+      else if ( ntk.is_pi( n ) )
+      {
+        /* all terminals have flow 0.0 */
+        node_data.flows[0] = node_data.flows[1] = 0.0f;
+        node_data.arrival[0] = 0.0f;
+        /* PIs have the negative phase implemented with an inverter */
+        node_data.arrival[1] = lib_inv_delay;
+        add_unit_cut( index );
+        continue;
+      }
+
+      /* don't touch box */
+      if constexpr ( has_is_dont_touch_v<Ntk> )
+      {
+        if ( ntk.is_dont_touch( n ) )
+        {
+          warning_box |= initialize_box( n );
+          continue;
+        }
+      }
+
+      /* compute cuts for node */
+      if constexpr ( Ntk::min_fanin_size == 2 && Ntk::max_fanin_size == 2 )
+      {
+        merge_cuts2<DO_AREA>( n );
+      }
+      else
+      {
+        merge_cuts<DO_AREA>( n );
+      }
+
+      /* match positive phase */
+      match_phase<DO_AREA>( n, 0u );
+
+      /* match negative phase */
+      match_phase<DO_AREA>( n, 1u );
+
+      /* try to drop one phase */
+      match_drop_phase<DO_AREA, false>( n, 0 );
+
+      /* load and try a multi-output matches */
+      if ( ps.map_multioutput && node_tuple_match[index] != UINT32_MAX )
+      {
+        /* continue if matches do not fit in the cut data structure due to bad settings */
+        if ( !match_multi_add_cuts<DO_AREA>( n ) )
+          continue;
+
+        if constexpr ( DO_AREA )
+        {
+          bool multi_success = match_multioutput<DO_AREA>( n );
+          if ( multi_success )
+            multi_node_update<DO_AREA>( n );
+        }
+      }
+    }
+
+    double area_old = area;
+    bool success = set_mapping_refs<false>();
+
+    if ( warning_box )
+    {
+      std::cerr << "[i] MAP WARNING: not mapped don't touch gates are treated as sequential black boxes\n";
+    }
+
+    /* round stats */
+    if ( ps.verbose )
+    {
+      std::stringstream stats{};
+      float area_gain = 0.0f;
+
+      if ( iteration != 1 )
+        area_gain = float( ( area_old - area ) / area_old * 100 );
+
+      if constexpr ( DO_AREA )
+      {
+        stats << fmt::format( "[i] AreaFlow : Delay = {:>12.2f}  Area = {:>12.2f}  Gain = {:>5.2f} %  Inverters = {:>5}  Time = {:>5.2f}\n", delay, area, area_gain, inv, to_seconds( clock::now() - time_begin ) );
+      }
+      else
+      {
+        stats << fmt::format( "[i] Delay    : Delay = {:>12.2f}  Area = {:>12.2f}  Gain = {:>5.2f} %  Inverters = {:>5}  Time = {:>5.2f}\n", delay, area, area_gain, inv, to_seconds( clock::now() - time_begin ) );
+      }
+      st.round_stats.push_back( stats.str() );
+    }
+
+    return success;
+  }
+
+  template<bool DO_AREA>
+  void merge_cuts2( node<Ntk> const& n )
+  {
+    auto index = ntk.node_to_index( n );
+    auto& node_data = node_match[index];
+    emap_cut_sort_type sort = emap_cut_sort_type::AREA;
+
+    /* compute cuts */
+    const auto fanin = 2;
+    ntk.foreach_fanin( ntk.index_to_node( index ), [this]( auto child, auto i ) {
+      lcuts[i] = &cuts[ntk.node_to_index( ntk.get_node( child ) )];
+    } );
+    lcuts[2] = &cuts[index];
+    auto& rcuts = *lcuts[fanin];
+
+    /* set cut limit for run-time optimization*/
+    rcuts.set_cut_limit( ps.cut_enumeration_ps.cut_limit );
+
+    cut_t new_cut;
+    fanin_cut_t vcuts;
+
+    for ( auto const& c1 : *lcuts[0] )
+    {
+      vcuts[0] = c1;
+      for ( auto const& c2 : *lcuts[1] )
+      {
+        if ( !c1->merge( *c2, new_cut, CutSize ) )
+        {
+          continue;
+        }
+
+        if ( ps.remove_dominated_cuts && rcuts.is_dominated( new_cut ) )
+        {
+          continue;
+        }
+
+        /* compute function */
+        vcuts[1] = c2;
+        compute_truth_table( index, vcuts, fanin, new_cut );
+
+        /* match cut and compute data */
+        compute_cut_data<DO_AREA>( new_cut, n );
+
+        if ( ps.remove_dominated_cuts )
+          rcuts.insert( new_cut, false, sort );
+        else
+          rcuts.simple_insert( new_cut, sort );
+      }
+    }
+
+    cuts_total += rcuts.size();
+
+    /* limit the maximum number of cuts */
+    rcuts.limit( ps.cut_enumeration_ps.cut_limit );
+
+    /* add trivial cut */
+    if ( rcuts.size() > 1 || ( *rcuts.begin() )->size() > 1 )
+    {
+      add_unit_cut( index );
+    }
+  }
+
+  template<bool DO_AREA>
+  void merge_cuts( node<Ntk> const& n )
+  {
+    auto index = ntk.node_to_index( n );
+    auto& node_data = node_match[index];
+    emap_cut_sort_type sort = emap_cut_sort_type::AREA;
+    cut_t best_cut;
+
+    /* compute cuts */
+    std::vector<uint32_t> cut_sizes;
+    ntk.foreach_fanin( ntk.index_to_node( index ), [this, &cut_sizes]( auto child, auto i ) {
+      lcuts[i] = &cuts[ntk.node_to_index( ntk.get_node( child ) )];
+      cut_sizes.push_back( static_cast<uint32_t>( lcuts[i]->size() ) );
+    } );
+    const auto fanin = cut_sizes.size();
+    lcuts[fanin] = &cuts[index];
+    auto& rcuts = *lcuts[fanin];
+
+    /* set cut limit for run-time optimization*/
+    rcuts.set_cut_limit( ps.cut_enumeration_ps.cut_limit );
+    fanin_cut_t vcuts;
+
+    if ( fanin > 1 && fanin <= ps.cut_enumeration_ps.fanin_limit )
+    {
+      cut_t new_cut, tmp_cut;
+
+      foreach_mixed_radix_tuple( cut_sizes.begin(), cut_sizes.end(), [&]( auto begin, auto end ) {
+        auto it = vcuts.begin();
+        auto i = 0u;
+        while ( begin != end )
+        {
+          *it++ = &( ( *lcuts[i++] )[*begin++] );
+        }
+
+        if ( !vcuts[0]->merge( *vcuts[1], new_cut, CutSize ) )
+        {
+          return true; /* continue */
+        }
+
+        for ( i = 2; i < fanin; ++i )
+        {
+          tmp_cut = new_cut;
+          if ( !vcuts[i]->merge( tmp_cut, new_cut, CutSize ) )
+          {
+            return true; /* continue */
+          }
+        }
+
+        if ( ps.remove_dominated_cuts && rcuts.is_dominated( new_cut ) )
+        {
+          return true; /* continue */
+        }
+
+        compute_truth_table( index, vcuts, fanin, new_cut );
+
+        /* match cut and compute data */
+        compute_cut_data<DO_AREA>( new_cut, n );
+
+        if ( ps.remove_dominated_cuts )
+          rcuts.insert( new_cut, false, sort );
+        else
+          rcuts.simple_insert( new_cut, sort );
+
+        return true;
+      } );
+
+      /* limit the maximum number of cuts */
+      rcuts.limit( ps.cut_enumeration_ps.cut_limit );
+    }
+    else if ( fanin == 1 )
+    {
+      for ( auto const& cut : *lcuts[0] )
+      {
+        cut_t new_cut = *cut;
+        vcuts[0] = cut;
+
+        compute_truth_table( index, vcuts, fanin, new_cut );
+
+        /* match cut and compute data */
+        compute_cut_data<DO_AREA>( new_cut, n );
+
+        if ( ps.remove_dominated_cuts )
+          rcuts.insert( new_cut, false, sort );
+        else
+          rcuts.simple_insert( new_cut, sort );
+      }
+
+      /* limit the maximum number of cuts */
+      rcuts.limit( ps.cut_enumeration_ps.cut_limit );
+    }
+
+    cuts_total += rcuts.size();
+
+    add_unit_cut( index );
+  }
+
+  template<bool DO_AREA>
+  bool compute_mapping_match_node()
+  {
+    for ( auto const& n : topo_order )
+    {
+      auto const index = ntk.node_to_index( n );
+      auto& node_data = node_match[index];
+
+      node_data.best_supergates[0] = node_data.best_supergates[1] = nullptr;
+      node_data.same_match = 0;
+      node_data.multioutput_match[0] = node_data.multioutput_match[1] = false;
+      node_data.required[0] = node_data.required[1] = std::numeric_limits<float>::max();
+      node_data.map_refs[0] = node_data.map_refs[1] = node_data.map_refs[2] = 0;
+      node_data.est_refs[0] = node_data.est_refs[1] = node_data.est_refs[2] = static_cast<float>( ntk.fanout_size( n ) );
+
+      if ( ntk.is_constant( n ) )
+      {
+        /* all terminals have flow 0 */
+        node_data.flows[0] = node_data.flows[1] = 0.0f;
+        node_data.arrival[0] = node_data.arrival[1] = 0.0f;
+        add_zero_cut( index );
+        match_constants( index );
+        continue;
+      }
+      else if ( ntk.is_pi( n ) )
+      {
+        /* all terminals have flow 0 */
+        node_data.flows[0] = node_data.flows[1] = 0.0f;
+        node_data.arrival[0] = 0.0f;
+        /* PIs have the negative phase implemented with an inverter */
+        node_data.arrival[1] = lib_inv_delay;
+        add_unit_cut( index );
+        continue;
+      }
+
+      /* compute the node mapping */
+      add_node_cut<DO_AREA>( n );
+
+      /* match positive phase */
+      match_phase<DO_AREA>( n, 0u );
+
+      /* match negative phase */
+      match_phase<DO_AREA>( n, 1u );
+
+      /* try to drop one phase */
+      match_drop_phase<DO_AREA, false>( n, 0 );
+    }
+    double area_old = area;
+    bool success = set_mapping_refs<false>();
+
+    /* round stats */
+    if ( ps.verbose )
+    {
+      std::stringstream stats{};
+      float area_gain = 0.0f;
+
+      if ( iteration != 1 )
+        area_gain = float( ( area_old - area ) / area_old * 100 );
+
+      if constexpr ( DO_AREA )
+      {
+        stats << fmt::format( "[i] AreaFlow : Delay = {:>12.2f}  Area = {:>12.2f}  Gain = {:>5.2f} %  Inverters = {:>5}  Time = {:>5.2f}\n", delay, area, area_gain, inv, to_seconds( clock::now() - time_begin ) );
+      }
+      else
+      {
+        stats << fmt::format( "[i] Delay    : Delay = {:>12.2f}  Area = {:>12.2f}  Gain = {:>5.2f} %  Inverters = {:>5}  Time = {:>5.2f}\n", delay, area, area_gain, inv, to_seconds( clock::now() - time_begin ) );
+      }
+      st.round_stats.push_back( stats.str() );
+    }
+
+    return success;
+  }
+
+  template<bool DO_AREA>
+  void add_node_cut( node<Ntk> const& n )
+  {
+    auto index = ntk.node_to_index( n );
+    auto& node_data = node_match[index];
+    auto& rcuts = &cuts[index];
+
+    std::vector<uint32_t> fanin_indexes;
+    fanin_indexes.reserve( Ntk::max_fanin_size );
+
+    ntk.foreach_fanin( n, [&]( auto const& f ) {
+      fanin_indexes.push_back( ntk.node_to_index( ntk.get_node( f ) ) );
+    } );
+
+    assert( fanin_indexes.size() <= CutSize );
+
+    cut_t new_cut = rcuts.add_cut( fanin_indexes.begin(), fanin_indexes.end() );
+    new_cut->function = kitty::extend_to<6>( ntk.node_function( n ) );
+
+    /* match cut and compute data */
+    compute_cut_data<DO_AREA>( new_cut, n );
+
+    ++cuts_total;
+  }
+
+  template<bool DO_AREA>
+  bool compute_mapping()
+  {
+    for ( auto const& n : topo_order )
+    {
+      uint32_t index = ntk.node_to_index( n );
+
+      /* reset mapping */
+      node_match[index].map_refs[0] = node_match[index].map_refs[1] = node_match[index].map_refs[2] = 0u;
+
+      if ( ntk.is_constant( n ) || ntk.is_pi( n ) )
+        continue;
+
+      /* don't touch box */
+      if constexpr ( has_is_dont_touch_v<Ntk> )
+      {
+        if ( ntk.is_dont_touch( n ) )
+        {
+          if constexpr ( has_has_binding_v<Ntk> )
+          {
+            propagate_data_forward_white_box( n );
+          }
+          continue;
+        }
+      }
+
+      /* match positive phase */
+      match_phase<DO_AREA>( n, 0u );
+
+      /* match negative phase */
+      match_phase<DO_AREA>( n, 1u );
+
+      /* try to drop one phase */
+      match_drop_phase<DO_AREA, false>( n, 0 );
+
+      /* try a multi-output match */
+      if constexpr ( DO_AREA )
+      {
+        if ( ps.map_multioutput && node_tuple_match[index] != UINT32_MAX )
+        {
+          bool multi_success = match_multioutput<DO_AREA>( n );
+          if ( multi_success )
+            multi_node_update<DO_AREA>( n );
+        }
+      }
+
+      assert( node_match[index].arrival[0] < node_match[index].required[0] + epsilon );
+      assert( node_match[index].arrival[1] < node_match[index].required[1] + epsilon );
+    }
+
+    double area_old = area;
+    bool success = set_mapping_refs<false>();
+
+    /* round stats */
+    if ( ps.verbose )
+    {
+      std::stringstream stats{};
+      float area_gain = 0.0f;
+
+      if ( iteration != 1 )
+        area_gain = float( ( area_old - area ) / area_old * 100 );
+
+      if constexpr ( DO_AREA )
+      {
+        stats << fmt::format( "[i] AreaFlow : Delay = {:>12.2f}  Area = {:>12.2f}  Gain = {:>5.2f} %  Inverters = {:>5}  Time = {:>5.2f}\n", delay, area, area_gain, inv, to_seconds( clock::now() - time_begin ) );
+      }
+      else
+      {
+        stats << fmt::format( "[i] Delay    : Delay = {:>12.2f}  Area = {:>12.2f}  Gain = {:>5.2f} %  Inverters = {:>5}  Time = {:>5.2f}\n", delay, area, area_gain, inv, to_seconds( clock::now() - time_begin ) );
+      }
+      st.round_stats.push_back( stats.str() );
+    }
+
+    return success;
+  }
+
+  template<bool SwitchActivity>
+  bool compute_mapping_exact( bool last_round )
+  {
+    for ( auto const& n : topo_order )
+    {
+      if ( ntk.is_constant( n ) || ntk.is_pi( n ) )
+        continue;
+
+      /* don't touch box */
+      if constexpr ( has_is_dont_touch_v<Ntk> )
+      {
+        if ( ntk.is_dont_touch( n ) )
+        {
+          if constexpr ( has_has_binding_v<Ntk> )
+          {
+            propagate_data_forward_white_box( n );
+          }
+          continue;
+        }
+      }
+
+      auto index = ntk.node_to_index( n );
+      auto& node_data = node_match[index];
+
+      /* recursively deselect the best cut shared between
+       * the two phases if in use in the cover */
+      if ( node_data.same_match && node_data.map_refs[2] != 0 )
+      {
+        uint8_t use_phase = node_data.best_supergate[0] != nullptr ? 0 : 1;
+        auto const& best_cut = cuts[index][node_data.best_cut[use_phase]];
+        cut_deref<SwitchActivity>( best_cut, n, use_phase );
+      }
+
+      /* match positive phase */
+      match_phase_exact<SwitchActivity>( n, 0u );
+
+      /* match negative phase */
+      match_phase_exact<SwitchActivity>( n, 1u );
+
+      /* try to drop one phase */
+      match_drop_phase<true, true>( n, 0 );
+
+      /* try a multi-output match */
+      if ( ps.map_multioutput && node_tuple_match[index] != UINT32_MAX )
+      {
+        bool multi_success = match_multioutput_exact<SwitchActivity>( n, last_round );
+        if ( multi_success )
+          multi_node_update_exact<SwitchActivity>( n );
+      }
+
+      if ( node_match[index].map_refs[0] )
+        assert( node_match[index].arrival[0] < node_match[index].required[0] + epsilon );
+      if ( node_match[index].map_refs[1] )
+        assert( node_match[index].arrival[1] < node_match[index].required[1] + epsilon );
+    }
+
+    double area_old = area;
+    bool success = set_mapping_refs<true>();
+
+    /* round stats */
+    if ( ps.verbose )
+    {
+      float area_gain = float( ( area_old - area ) / area_old * 100 );
+      std::stringstream stats{};
+      if constexpr ( SwitchActivity )
+        stats << fmt::format( "[i] Switching: Delay = {:>12.2f}  Area = {:>12.2f}  Gain = {:>5.2f} %  Inverters = {:>5}  Time = {:>5.2f}\n", delay, area, area_gain, inv, to_seconds( clock::now() - time_begin ) );
+      else
+        stats << fmt::format( "[i] Area     : Delay = {:>12.2f}  Area = {:>12.2f}  Gain = {:>5.2f} %  Inverters = {:>5}  Time = {:>5.2f}\n", delay, area, area_gain, inv, to_seconds( clock::now() - time_begin ) );
+      st.round_stats.push_back( stats.str() );
+    }
+
+    return success;
+  }
+
+  template<bool SwitchActivity>
+  bool compute_mapping_exact_reversed( bool last_round )
+  {
+    /* this method works in reverse topological order: less nodes to update (faster) */
+    /* instead of propagating arrival times forward, it propagates required times backwards */
+
+    for ( auto it = topo_order.rbegin(); it != topo_order.rend(); ++it )
+    {
+      if ( ntk.is_constant( *it ) || ntk.is_pi( *it ) )
+        continue;
+
+      const auto index = ntk.node_to_index( *it );
+      auto& node_data = node_match[index];
+
+      /* skip not mapped nodes */
+      if ( node_match[index].map_refs[2] == 0 )
+        continue;
+
+      /* don't touch box */
+      if constexpr ( has_is_dont_touch_v<Ntk> )
+      {
+        node<Ntk> n = ntk.index_to_node( index );
+        if ( ntk.is_dont_touch( n ) )
+        {
+          if constexpr ( has_has_binding_v<Ntk> )
+          {
+            propagate_data_backward_white_box( n );
+          }
+          continue;
+        }
+      }
+
+      /* recursively deselect the best cut shared between
+       * the two phases if in use in the cover */
+      uint8_t use_phase = node_data.best_supergate[0] != nullptr ? 0 : 1;
+      double old_required = -1;
+      if ( node_data.same_match )
+      {
+        auto const& best_cut = cuts[index][node_data.best_cut[use_phase]];
+        cut_deref<SwitchActivity>( best_cut, *it, use_phase );
+
+        /* propagate required time over the output inverter if present */
+        if ( node_data.map_refs[use_phase ^ 1] > 0 )
+        {
+          old_required = node_data.required[use_phase];
+          node_data.required[use_phase] = std::min( node_data.required[use_phase], node_data.required[use_phase ^ 1] - lib_inv_delay );
+        }
+      }
+
+      /* match positive phase */
+      match_phase_exact<SwitchActivity>( *it, 0u );
+
+      /* match negative phase */
+      match_phase_exact<SwitchActivity>( *it, 1u );
+
+      /* restore required time */
+      if ( old_required > 0 )
+      {
+        node_data.required[use_phase] = old_required;
+      }
+
+      /* try to drop one phase */
+      match_drop_phase<true, true>( *it, 0 );
+
+      /* try a multi-output match */
+      if ( ps.map_multioutput && node_tuple_match[index] < UINT32_MAX - 1 )
+      {
+        match_multioutput_exact<SwitchActivity>( *it, true );
+
+        /* propagate required time for the selected gates */
+        match_multioutput_propagate_required( *it );
+      }
+      else
+      {
+        match_propagate_required( index );
+      }
+    }
+
+    double area_old = area;
+
+    propagate_arrival_times();
+
+    /* round stats */
+    if ( ps.verbose )
+    {
+      float area_gain = float( ( area_old - area ) / area_old * 100 );
+      std::stringstream stats{};
+      if constexpr ( SwitchActivity )
+        stats << fmt::format( "[i] Switching: Delay = {:>12.2f}  Area = {:>12.2f}  Gain = {:>5.2f} %  Inverters = {:>5}  Time = {:>5.2f}\n", delay, area, area_gain, inv, to_seconds( clock::now() - time_begin ) );
+      else
+        stats << fmt::format( "[i] Area Rev : Delay = {:>12.2f}  Area = {:>12.2f}  Gain = {:>5.2f} %  Inverters = {:>5}  Time = {:>5.2f}\n", delay, area, area_gain, inv, to_seconds( clock::now() - time_begin ) );
+      st.round_stats.push_back( stats.str() );
+    }
+
+    return true;
+  }
+
+  inline void match_propagate_required( uint32_t index )
+  {
+    /* don't touch box */
+    if constexpr ( has_is_dont_touch_v<Ntk> )
+    {
+      node<Ntk> n = ntk.index_to_node( index );
+      if ( ntk.is_dont_touch( n ) )
+      {
+        if constexpr ( has_has_binding_v<Ntk> )
+        {
+          propagate_data_backward_white_box( n );
+        }
+        return;
+      }
+    }
+
+    auto& node_data = node_match[index];
+
+    /* propagate required time through the leaves */
+    unsigned use_phase = node_data.best_supergate[0] == nullptr ? 1u : 0u;
+    unsigned other_phase = use_phase ^ 1;
+
+    assert( node_data.best_supergate[0] != nullptr || node_data.best_supergate[1] != nullptr );
+    // assert( node_data.map_refs[0] || node_data.map_refs[1] );
+
+    /* propagate required time over the output inverter if present */
+    if ( node_data.same_match && node_data.map_refs[use_phase ^ 1] > 0 )
+    {
+      node_data.required[use_phase] = std::min( node_data.required[use_phase], node_data.required[other_phase] - lib_inv_delay );
+    }
+
+    if ( node_data.map_refs[0] )
+      assert( node_data.arrival[0] < node_data.required[0] + epsilon );
+    if ( node_data.map_refs[1] )
+      assert( node_data.arrival[1] < node_data.required[1] + epsilon );
+
+    if ( node_data.same_match || node_data.map_refs[use_phase] > 0 )
+    {
+      auto ctr = 0u;
+      auto const& best_cut = cuts[index][node_data.best_cut[use_phase]];
+      auto const& supergate = node_data.best_supergate[use_phase];
+      for ( auto leaf : best_cut )
+      {
+        auto phase = ( node_data.phase[use_phase] >> ctr ) & 1;
+        node_match[leaf].required[phase] = std::min( node_match[leaf].required[phase], node_data.required[use_phase] - supergate->tdelay[ctr] );
+        ++ctr;
+      }
+    }
+
+    if ( !node_data.same_match && node_data.map_refs[other_phase] > 0 )
+    {
+      auto ctr = 0u;
+      auto const& best_cut = cuts[index][node_data.best_cut[other_phase]];
+      auto const& supergate = node_data.best_supergate[other_phase];
+      for ( auto leaf : best_cut )
+      {
+        auto phase = ( node_data.phase[other_phase] >> ctr ) & 1;
+        node_match[leaf].required[phase] = std::min( node_match[leaf].required[phase], node_data.required[other_phase] - supergate->tdelay[ctr] );
+        ++ctr;
+      }
+    }
+  }
+
+  template<bool ELA>
+  bool set_mapping_refs()
+  {
+    /* compute the current worst delay and update the mapping refs */
+    delay = 0.0f;
+    ntk.foreach_po( [this]( auto s ) {
+      const auto index = ntk.node_to_index( ntk.get_node( s ) );
+
+      if ( ntk.is_complemented( s ) )
+        delay = std::max( delay, node_match[index].arrival[1] );
+      else
+        delay = std::max( delay, node_match[index].arrival[0] );
+
+      if constexpr ( !ELA )
+      {
+        node_match[index].map_refs[2]++;
+        if ( ntk.is_complemented( s ) )
+          node_match[index].map_refs[1]++;
+        else
+          node_match[index].map_refs[0]++;
+      }
+    } );
+
+    /* compute current area and update mapping refs in top-down order */
+    area = 0.0f;
+    inv = 0;
+    for ( auto it = topo_order.rbegin(); it != topo_order.rend(); ++it )
+    {
+      const auto index = ntk.node_to_index( *it );
+      auto& node_data = node_match[index];
+
+      /* skip constants and PIs */
+      if ( ntk.is_constant( *it ) )
+      {
+        if ( node_match[index].map_refs[2] > 0u )
+        {
+          /* if used and not available in the library launch a mapping error */
+          if ( node_data.best_supergate[0] == nullptr && node_data.best_supergate[1] == nullptr )
+          {
+            std::cerr << "[i] MAP ERROR: technology library does not contain constant gates, impossible to perform mapping" << std::endl;
+            st.mapping_error = true;
+            return false;
+          }
+        }
+        continue;
+      }
+      else if ( ntk.is_pi( *it ) )
+      {
+        if ( node_match[index].map_refs[1] > 0u )
+        {
+          /* Add inverter area over the negated fanins */
+          area += lib_inv_area;
+          ++inv;
+        }
+        continue;
+      }
+
+      /* continue if not referenced in the cover */
+      if ( node_match[index].map_refs[2] == 0u )
+        continue;
+
+      /* don't touch box */
+      if constexpr ( has_is_dont_touch_v<Ntk> )
+      {
+        if ( ntk.is_dont_touch( *it ) )
+        {
+          set_mapping_refs_dont_touch<ELA>( *it );
+          continue;
+        }
+      }
+
+      unsigned use_phase = node_data.best_supergate[0] == nullptr ? 1u : 0u;
+
+      if ( node_data.best_supergate[use_phase] == nullptr )
+      {
+        /* Library is not complete, mapping is not possible */
+        std::cerr << "[i] MAP ERROR: technology library is not complete, impossible to perform mapping" << std::endl;
+        st.mapping_error = true;
+        return false;
+      }
+
+      if ( node_data.same_match || node_data.map_refs[use_phase] > 0 )
+      {
+        if constexpr ( !ELA )
+        {
+          auto const& best_cut = cuts[index][node_data.best_cut[use_phase]];
+          auto ctr = 0u;
+
+          for ( auto const leaf : best_cut )
+          {
+            node_match[leaf].map_refs[2]++;
+            if ( ( node_data.phase[use_phase] >> ctr++ ) & 1 )
+              node_match[leaf].map_refs[1]++;
+            else
+              node_match[leaf].map_refs[0]++;
+          }
+        }
+        area += node_data.area[use_phase];
+        if ( node_data.same_match && node_data.map_refs[use_phase ^ 1] > 0 )
+        {
+          area += lib_inv_area;
+          ++inv;
+        }
+      }
+
+      /* invert the phase */
+      use_phase = use_phase ^ 1;
+
+      /* if both phases are implemented and used */
+      if ( !node_data.same_match && node_data.map_refs[use_phase] > 0 )
+      {
+        if constexpr ( !ELA )
+        {
+          auto const& best_cut = cuts[index][node_data.best_cut[use_phase]];
+
+          auto ctr = 0u;
+          for ( auto const leaf : best_cut )
+          {
+            node_match[leaf].map_refs[2]++;
+            if ( ( node_data.phase[use_phase] >> ctr++ ) & 1 )
+              node_match[leaf].map_refs[1]++;
+            else
+              node_match[leaf].map_refs[0]++;
+          }
+        }
+        area += node_data.area[use_phase];
+      }
+    }
+
+    ++iteration;
+
+    if constexpr ( ELA )
+    {
+      return true;
+    }
+
+    /* blend estimated references */
+    for ( auto i = 0u; i < ntk.size(); ++i )
+    {
+      node_match[i].est_refs[2] = std::max( 1.0, ( 1.0 * node_match[i].est_refs[2] + 2.0f * node_match[i].map_refs[2] ) / 3.0 );
+      node_match[i].est_refs[1] = std::max( 1.0, ( 1.0 * node_match[i].est_refs[1] + 2.0f * node_match[i].map_refs[1] ) / 3.0 );
+      node_match[i].est_refs[0] = std::max( 1.0, ( 1.0 * node_match[i].est_refs[0] + 2.0f * node_match[i].map_refs[0] ) / 3.0 );
+    }
+
+    return true;
+  }
+
+  template<bool ELA>
+  inline void set_mapping_refs_dont_touch( node<Ntk> const& n )
+  {
+    if constexpr ( !ELA )
+    {
+      /* reference node */
+      ntk.foreach_fanin( n, [&]( auto const& f ) {
+        uint32_t leaf = ntk.node_to_index( ntk.get_node( f ) );
+        uint8_t phase = ntk.is_complemented( f ) ? 1 : 0;
+        node_match[leaf].map_refs[2]++;
+        node_match[leaf].map_refs[phase]++;
+      } );
+    }
+
+    const auto index = ntk.node_to_index( n );
+
+    if constexpr ( has_has_binding_v<Ntk> )
+    {
+      /* increase area */
+      area += node_match[index].area[0];
+      if ( node_match[index].map_refs[1] )
+      {
+        area += lib_inv_area;
+        ++inv;
+      }
+    }
+  }
+
+  void compute_required_time( bool exit_early = false )
+  {
+    for ( auto i = 0u; i < node_match.size(); ++i )
+    {
+      node_match[i].required[0] = node_match[i].required[1] = std::numeric_limits<float>::max();
+    }
+
+    /* return if mapping is area oriented */
+    if ( ps.area_oriented_mapping )
+      return;
+
+    double required = delay;
+
+    /* relax delay constraints */
+    if ( iteration == 1 && ps.required_time == 0.0f && ps.relax_required > 0.0f )
+    {
+      required *= ( 100.0 + ps.relax_required ) / 100.0;
+    }
+
+    /* Global target time constraint */
+    if ( ps.required_time != 0.0f )
+    {
+      if ( ps.required_time < delay - epsilon )
+      {
+        if ( !ps.area_oriented_mapping && iteration == 1 )
+          std::cerr << fmt::format( "[i] MAP WARNING: cannot meet the target required time of {:.2f}", ps.required_time ) << std::endl;
+      }
+      else
+      {
+        required = ps.required_time;
+      }
+    }
+
+    /* set the required time at POs */
+    ntk.foreach_po( [&]( auto const& s ) {
+      const auto index = ntk.node_to_index( ntk.get_node( s ) );
+      if ( ntk.is_complemented( s ) )
+        node_match[index].required[1] = required;
+      else
+        node_match[index].required[0] = required;
+    } );
+
+    if ( exit_early )
+      return;
+
+    /* propagate required time to the PIs */
+    for ( auto it = topo_order.rbegin(); it != topo_order.rend(); ++it )
+    {
+      if ( ntk.is_pi( *it ) || ntk.is_constant( *it ) )
+        break;
+
+      const auto index = ntk.node_to_index( *it );
+
+      if ( node_match[index].map_refs[2] == 0 )
+        continue;
+
+      match_propagate_required( index );
+    }
+  }
+
+  void propagate_arrival_times()
+  {
+    area = 0.0f;
+    inv = 0;
+    for ( auto const& n : topo_order )
+    {
+      auto index = ntk.node_to_index( n );
+      auto& node_data = node_match[index];
+
+      /* measure area */
+      if ( ntk.is_constant( n ) )
+      {
+        continue;
+      }
+      else if ( ntk.is_pi( n ) )
+      {
+        if ( node_data.map_refs[1] > 0u )
+        {
+          /* Add inverter area over the negated fanins */
+          area += lib_inv_area;
+          ++inv;
+        }
+        continue;
+      }
+
+      /* reset required time */
+      node_data.required[0] = std::numeric_limits<float>::max();
+      node_data.required[1] = std::numeric_limits<float>::max();
+
+      /* don't touch box */
+      if constexpr ( has_is_dont_touch_v<Ntk> )
+      {
+        node<Ntk> n = ntk.index_to_node( index );
+        if ( ntk.is_dont_touch( n ) )
+        {
+          if constexpr ( has_has_binding_v<Ntk> )
+          {
+            propagate_data_forward_white_box( n );
+            if ( node_data.map_refs[2] )
+              area += node_data.area[0];
+            if ( node_data.map_refs[1] )
+            {
+              area += lib_inv_area;
+              ++inv;
+            }
+          }
+          continue;
+        }
+      }
+
+      uint8_t use_phase = node_data.best_supergate[0] != nullptr ? 0 : 1;
+
+      /* compute arrival of use_phase */
+      supergate<NInputs> const* best_supergate = node_data.best_supergate[use_phase];
+      double worst_arrival = 0;
+      uint16_t best_phase = node_data.phase[use_phase];
+      auto ctr = 0u;
+      for ( auto l : cuts[index][node_data.best_cut[use_phase]] )
+      {
+        double arrival_pin = node_match[l].arrival[( best_phase >> ctr ) & 1] + best_supergate->tdelay[ctr];
+        worst_arrival = std::max( worst_arrival, arrival_pin );
+        ++ctr;
+      }
+
+      node_data.arrival[use_phase] = worst_arrival;
+
+      /* compute area */
+      if ( ( node_data.map_refs[2] && node_data.same_match ) || node_data.map_refs[use_phase] > 0 )
+      {
+        area += node_data.area[use_phase];
+        if ( node_data.same_match && node_data.map_refs[use_phase ^ 1] > 0 )
+        {
+          area += lib_inv_area;
+          ++inv;
+        }
+      }
+
+      /* compute arrival of the other phase */
+      use_phase ^= 1;
+      if ( node_data.same_match )
+      {
+        node_data.arrival[use_phase] = worst_arrival + lib_inv_delay;
+        continue;
+      }
+
+      assert( node_data.best_supergate[use_phase] != nullptr );
+
+      best_supergate = node_data.best_supergate[use_phase];
+      worst_arrival = 0;
+      best_phase = node_data.phase[use_phase];
+      ctr = 0u;
+      for ( auto l : cuts[index][node_data.best_cut[use_phase]] )
+      {
+        double arrival_pin = node_match[l].arrival[( best_phase >> ctr ) & 1] + best_supergate->tdelay[ctr];
+        worst_arrival = std::max( worst_arrival, arrival_pin );
+        ++ctr;
+      }
+
+      node_data.arrival[use_phase] = worst_arrival;
+
+      if ( node_data.map_refs[use_phase] > 0 )
+      {
+        area += node_data.area[use_phase];
+      }
+    }
+
+    /* compute the current worst delay */
+    delay = 0.0f;
+    ntk.foreach_po( [this]( auto s ) {
+      const auto index = ntk.node_to_index( ntk.get_node( s ) );
+
+      if ( ntk.is_complemented( s ) )
+        delay = std::max( delay, node_match[index].arrival[1] );
+      else
+        delay = std::max( delay, node_match[index].arrival[0] );
+    } );
+
+    /* return if mapping is area oriented */
+    ++iteration;
+    if ( ps.area_oriented_mapping )
+      return;
+
+    /* set the required time at POs */
+    ntk.foreach_po( [&]( auto const& s ) {
+      const auto index = ntk.node_to_index( ntk.get_node( s ) );
+      if ( ntk.is_complemented( s ) )
+        node_match[index].required[1] = delay;
+      else
+        node_match[index].required[0] = delay;
+    } );
+  }
+
+  void propagate_arrival_node( node<Ntk> const& n )
+  {
+    uint32_t index = ntk.node_to_index( n );
+    auto& node_data = node_match[index];
+    uint8_t use_phase = node_data.best_supergate[0] != nullptr ? 0 : 1;
+
+    /* compute arrival of use_phase */
+    supergate<NInputs> const* best_supergate = node_data.best_supergate[use_phase];
+    double worst_arrival = 0;
+    uint16_t best_phase = node_data.phase[use_phase];
+    auto ctr = 0u;
+    for ( auto l : cuts[index][node_data.best_cut[use_phase]] )
+    {
+      double arrival_pin = node_match[l].arrival[( best_phase >> ctr ) & 1] + best_supergate->tdelay[ctr];
+      worst_arrival = std::max( worst_arrival, arrival_pin );
+      ++ctr;
+    }
+    node_data.arrival[use_phase] = worst_arrival;
+
+    /* compute arrival of the other phase */
+    use_phase ^= 1;
+    if ( node_data.same_match )
+    {
+      node_data.arrival[use_phase] = worst_arrival + lib_inv_delay;
+      return;
+    }
+
+    assert( node_data.best_supergate[0] != nullptr );
+
+    best_supergate = node_data.best_supergate[use_phase];
+    worst_arrival = 0;
+    best_phase = node_data.phase[use_phase];
+    ctr = 0u;
+    for ( auto l : cuts[index][node_data.best_cut[use_phase]] )
+    {
+      double arrival_pin = node_match[l].arrival[( best_phase >> ctr ) & 1] + best_supergate->tdelay[ctr];
+      worst_arrival = std::max( worst_arrival, arrival_pin );
+      ++ctr;
+    }
+
+    node_data.arrival[use_phase] = worst_arrival;
+  }
+
+  template<bool DO_AREA>
+  void match_phase( node<Ntk> const& n, uint8_t phase )
+  {
+    double best_arrival = std::numeric_limits<float>::max();
+    double best_area_flow = std::numeric_limits<float>::max();
+    float best_area = std::numeric_limits<float>::max();
+    uint32_t best_size = UINT32_MAX;
+    uint8_t best_cut = 0u;
+    uint16_t best_phase = 0u;
+    uint8_t cut_index = 0u;
+    auto index = ntk.node_to_index( n );
+
+    auto& node_data = node_match[index];
+    supergate<NInputs> const* best_supergate = node_data.best_supergate[phase];
+
+    /* unmap multioutput */
+    if ( node_data.multioutput_match[phase] )
+    {
+      best_supergate = nullptr;
+      node_data.multioutput_match[phase] = false;
+    }
+
+    /* foreach cut */
+    for ( auto& cut : cuts[index] )
+    {
+      /* trivial cuts or not matched cuts */
+      if ( ( *cut )->ignore )
+      {
+        ++cut_index;
+        continue;
+      }
+
+      auto const& supergates = ( *cut )->supergates;
+      auto const negation = ( *cut )->negations[phase];
+
+      if ( supergates[phase] == nullptr )
+      {
+        ++cut_index;
+        continue;
+      }
+
+      /* match each gate and take the best one */
+      for ( auto const& gate : *supergates[phase] )
+      {
+        uint16_t gate_polarity = gate.polarity ^ negation;
+        double worst_arrival = 0.0f;
+        double area_local = gate.area;
+
+        auto ctr = 0u;
+        node_data.phase[phase] = gate_polarity;
+        for ( auto l : *cut )
+        {
+          double arrival_pin = node_match[l].arrival[( gate_polarity >> ctr ) & 1] + gate.tdelay[ctr];
+          worst_arrival = std::max( worst_arrival, arrival_pin );
+
+          uint8_t leaf_phase = ( node_data.phase[phase] >> ctr ) & 1;
+          area_local += node_match[l].flows[leaf_phase];
+          ++ctr;
+        }
+
+        if constexpr ( DO_AREA )
+        {
+          if ( worst_arrival > node_data.required[phase] + epsilon || worst_arrival >= std::numeric_limits<float>::max() )
+            continue;
+        }
+
+        node_data.phase[phase] = gate_polarity;
+
+        if ( compare_map<DO_AREA>( worst_arrival, best_arrival, area_local, best_area_flow, cut->size(), best_size ) )
+        {
+          best_arrival = worst_arrival;
+          best_area_flow = area_local;
+          best_size = cut->size();
+          best_cut = cut_index;
+          best_area = gate.area;
+          best_phase = gate_polarity;
+          best_supergate = &gate;
+        }
+      }
+
+      ++cut_index;
+    }
+
+    node_data.flows[phase] = best_area_flow;
+    node_data.arrival[phase] = best_arrival;
+    node_data.area[phase] = best_area;
+    node_data.best_cut[phase] = best_cut;
+    node_data.phase[phase] = best_phase;
+    node_data.best_supergate[phase] = best_supergate;
+  }
+
+  template<bool SwitchActivity>
+  void match_phase_exact( node<Ntk> const& n, uint8_t phase )
+  {
+    double best_arrival = std::numeric_limits<float>::max();
+    float best_exact_area = std::numeric_limits<float>::max();
+    float best_area = std::numeric_limits<float>::max();
+    uint32_t best_size = UINT32_MAX;
+    uint8_t best_cut = 0u;
+    uint16_t best_phase = 0u;
+    uint8_t cut_index = 0u;
+    auto index = ntk.node_to_index( n );
+
+    auto& node_data = node_match[index];
+    supergate<NInputs> const* best_supergate = node_data.best_supergate[phase];
+
+    /* unmap multioutput */
+    if ( node_data.multioutput_match[phase] )
+    {
+      /* dereference multi-output */
+      if ( !node_data.same_match && best_supergate != nullptr && node_data.map_refs[phase] )
+      {
+        auto const& cut = multi_cut_set[node_data.best_cut[phase]][0];
+        cut_deref<SwitchActivity>( cut, n, phase );
+      }
+      best_supergate = nullptr;
+      node_data.multioutput_match[phase] = false;
+    }
+
+    /* recompute best match info */
+    if ( best_supergate != nullptr )
+    {
+      /* if cut is implemented, remove it from the cover */
+      if ( !node_data.same_match && node_data.map_refs[phase] )
+      {
+        auto const& cut = cuts[index][node_data.best_cut[phase]];
+        cut_deref<SwitchActivity>( cut, n, phase );
+      }
+    }
+
+    /* foreach cut */
+    for ( auto& cut : cuts[index] )
+    {
+      /* trivial cuts or not matched cuts */
+      if ( ( *cut )->ignore )
+      {
+        ++cut_index;
+        continue;
+      }
+
+      auto const& supergates = ( *cut )->supergates;
+      auto const negation = ( *cut )->negations[phase];
+
+      if ( supergates[phase] == nullptr )
+      {
+        ++cut_index;
+        continue;
+      }
+
+      /* match each gate and take the best one */
+      for ( auto const& gate : *supergates[phase] )
+      {
+        uint16_t gate_polarity = gate.polarity ^ negation;
+        double worst_arrival = 0.0f;
+
+        auto ctr = 0u;
+        for ( auto l : *cut )
+        {
+          double arrival_pin = node_match[l].arrival[( gate_polarity >> ctr ) & 1] + gate.tdelay[ctr];
+          worst_arrival = std::max( worst_arrival, arrival_pin );
+          ++ctr;
+        }
+
+        if ( worst_arrival > node_data.required[phase] + epsilon || worst_arrival >= std::numeric_limits<float>::max() )
+          continue;
+
+        node_data.phase[phase] = gate_polarity;
+        node_data.area[phase] = gate.area;
+        float area_exact = cut_measure_mffc<SwitchActivity>( *cut, n, phase );
+
+        if ( compare_map<true>( worst_arrival, best_arrival, area_exact, best_exact_area, cut->size(), best_size ) )
+        {
+          best_arrival = worst_arrival;
+          best_exact_area = area_exact;
+          best_area = gate.area;
+          best_size = cut->size();
+          best_cut = cut_index;
+          best_phase = gate_polarity;
+          best_supergate = &gate;
+        }
+      }
+
+      ++cut_index;
+    }
+
+    node_data.flows[phase] = best_exact_area;
+    node_data.arrival[phase] = best_arrival;
+    node_data.area[phase] = best_area;
+    node_data.best_cut[phase] = best_cut;
+    node_data.phase[phase] = best_phase;
+    node_data.best_supergate[phase] = best_supergate;
+
+    if ( !node_data.same_match && node_data.map_refs[phase] )
+    {
+      best_exact_area = cut_ref<SwitchActivity>( cuts[index][best_cut], n, phase );
+    }
+  }
+
+  template<bool DO_AREA, bool ELA>
+  void match_drop_phase( node<Ntk> const& n, float required_margin_factor )
+  {
+    auto index = ntk.node_to_index( n );
+    auto& node_data = node_match[index];
+
+    /* compute arrival adding an inverter to the other match phase */
+    double worst_arrival_npos = node_data.arrival[1] + lib_inv_delay;
+    double worst_arrival_nneg = node_data.arrival[0] + lib_inv_delay;
+    bool use_zero = false;
+    bool use_one = false;
+
+    /* only one phase is matched */
+    if ( node_data.best_supergate[0] == nullptr )
+    {
+      set_match_complemented_phase( index, 1, worst_arrival_npos );
+      if constexpr ( ELA )
+      {
+        if ( node_data.map_refs[2] )
+          cut_ref<false>( cuts[index][node_data.best_cut[1]], n, 1 );
+      }
+      return;
+    }
+    else if ( node_data.best_supergate[1] == nullptr )
+    {
+      set_match_complemented_phase( index, 0, worst_arrival_nneg );
+      if constexpr ( ELA )
+      {
+        if ( node_data.map_refs[2] )
+          cut_ref<false>( cuts[index][node_data.best_cut[0]], n, 0 );
+      }
+      return;
+    }
+
+    /* try to use only one match to cover both phases */
+    if constexpr ( !DO_AREA )
+    {
+      /* if arrival improves matching the other phase and inserting an inverter */
+      if ( worst_arrival_npos < node_data.arrival[0] + epsilon )
+      {
+        use_one = true;
+      }
+      if ( worst_arrival_nneg < node_data.arrival[1] + epsilon )
+      {
+        use_zero = true;
+      }
+    }
+    else
+    {
+      /* check if both phases + inverter meet the required time */
+      use_zero = worst_arrival_nneg < ( node_data.required[1] + epsilon - required_margin_factor * lib_inv_delay );
+      use_one = worst_arrival_npos < ( node_data.required[0] + epsilon - required_margin_factor * lib_inv_delay );
+    }
+
+    /* condition on not used phases, evaluate a substitution during exact area recovery */
+    if constexpr ( ELA )
+    {
+      if ( iteration != 0 )
+      {
+        if ( node_data.map_refs[0] == 0 || node_data.map_refs[1] == 0 )
+        {
+          /* select the used match */
+          auto phase = 0;
+          auto nphase = 0;
+          if ( node_data.map_refs[0] == 0 )
+          {
+            phase = 1;
+            use_one = true;
+            use_zero = false;
+          }
+          else
+          {
+            nphase = 1;
+            use_one = false;
+            use_zero = true;
+          }
+          /* select the not used match instead if it leads to area improvement and doesn't violate the required time */
+          if ( node_data.arrival[nphase] + lib_inv_delay < node_data.required[phase] + epsilon )
+          {
+            auto size_phase = cuts[index][node_data.best_cut[phase]].size();
+            auto size_nphase = cuts[index][node_data.best_cut[nphase]].size();
+
+            if ( compare_map<DO_AREA>( node_data.arrival[nphase] + lib_inv_delay, node_data.arrival[phase], node_data.flows[nphase] + lib_inv_area, node_data.flows[phase], size_nphase, size_phase ) )
+            {
+              /* invert the choice */
+              use_zero = !use_zero;
+              use_one = !use_one;
+            }
+          }
+        }
+      }
+    }
+
+    if ( ( !use_zero && !use_one ) )
+    {
+      /* use both phases */
+      if ( ps.allow_node_duplication )
+      {
+        node_data.flows[0] = node_data.flows[0] / node_data.est_refs[0];
+        node_data.flows[1] = node_data.flows[1] / node_data.est_refs[1];
+        node_data.same_match = false;
+        return;
+      }
+
+      /* if node duplication is not allowed, pick one phase based on delay */
+      auto size_zero = cuts[index][node_data.best_cut[0]].size();
+      auto size_one = cuts[index][node_data.best_cut[1]].size();
+      if ( compare_map<false>( worst_arrival_npos, worst_arrival_nneg, node_data.flows[1], node_data.flows[0], size_one, size_zero ) )
+        use_zero = true;
+      else
+        use_one = true;
+    }
+
+    /* use area flow as a tiebreaker */
+    if ( use_zero && use_one )
+    {
+      auto size_zero = cuts[index][node_data.best_cut[0]].size();
+      auto size_one = cuts[index][node_data.best_cut[1]].size();
+      if ( compare_map<DO_AREA>( worst_arrival_nneg, worst_arrival_npos, node_data.flows[0], node_data.flows[1], size_zero, size_one ) )
+        use_one = false;
+      else
+        use_zero = false;
+    }
+
+    if ( use_zero )
+    {
+      if constexpr ( ELA )
+      {
+        /* set cut references */
+        if ( !node_data.same_match )
+        {
+          /* dereference the negative phase cut if in use */
+          if ( node_data.map_refs[1] > 0 )
+            cut_deref<false>( cuts[index][node_data.best_cut[1]], n, 1 );
+          /* reference the positive cut if not in use before */
+          if ( node_data.map_refs[0] == 0 && node_data.map_refs[2] )
+            cut_ref<false>( cuts[index][node_data.best_cut[0]], n, 0 );
+        }
+        else if ( node_data.map_refs[2] )
+          cut_ref<false>( cuts[index][node_data.best_cut[0]], n, 0 );
+      }
+      set_match_complemented_phase( index, 0, worst_arrival_nneg );
+    }
+    else
+    {
+      if constexpr ( ELA )
+      {
+        /* set cut references */
+        if ( !node_data.same_match )
+        {
+          /* dereference the positive phase cut if in use */
+          if ( node_data.map_refs[0] > 0 )
+            cut_deref<false>( cuts[index][node_data.best_cut[0]], n, 0 );
+          /* reference the negative cut if not in use before */
+          if ( node_data.map_refs[1] == 0 && node_data.map_refs[2] )
+            cut_ref<false>( cuts[index][node_data.best_cut[1]], n, 1 );
+        }
+        else if ( node_data.map_refs[2] )
+          cut_ref<false>( cuts[index][node_data.best_cut[1]], n, 1 );
+      }
+      set_match_complemented_phase( index, 1, worst_arrival_npos );
+    }
+  }
+
+  inline void set_match_complemented_phase( uint32_t index, uint8_t phase, double worst_arrival_n )
+  {
+    auto& node_data = node_match[index];
+    auto phase_n = phase ^ 1;
+    node_data.same_match = true;
+    node_data.best_supergate[phase_n] = nullptr;
+    node_data.best_cut[phase_n] = node_data.best_cut[phase];
+    node_data.phase[phase_n] = node_data.phase[phase];
+    node_data.arrival[phase_n] = worst_arrival_n;
+    node_data.area[phase_n] = node_data.area[phase];
+    node_data.flows[phase] = node_data.flows[phase] / node_data.est_refs[2];
+    node_data.flows[phase_n] = node_data.flows[phase] + lib_inv_area;
+  }
+
+  void reindex_multioutput_data()
+  {
+    /* re-index the multioutput list using the lowest index output instead of the greatest one */
+    if ( !ps.map_multioutput )
+      return;
+
+    for ( auto i = ntk.num_pis(); i < topo_order.size(); ++i )
+    {
+      uint32_t tuple_index = node_tuple_match[i];
+      if ( tuple_index >= UINT32_MAX - 1 )
+        continue;
+
+      multi_match_t const& tuple_data = multi_node_match[tuple_index][0];
+      node_tuple_match[i] = UINT32_MAX - 1; /* arbitrary value to skip the required time propagation */
+      node_tuple_match[tuple_data[0].node_index] = tuple_index;
+    }
+  }
+
+  bool initialize_box( node<Ntk> const& n )
+  {
+    uint32_t index = ntk.node_to_index( n );
+    auto& node_data = node_match[index];
+    add_unit_cut( index );
+
+    node_data.same_match = true;
+
+    /* if it has mapping data propagate the delays and measure the data */
+    if constexpr ( has_has_binding_v<Ntk> )
+    {
+      propagate_data_forward_white_box( n );
+      return false;
+    }
+
+    /* consider as a black box */
+    node_data.flows[0] = node_data.flows[1] = 0.0f;
+    node_data.arrival[0] = 0.0f;
+    node_data.arrival[1] = lib_inv_delay;
+    node_data.area[0] = node_data.area[1] = 0;
+    node_data.flows[0] = 0;
+
+    return true;
+  }
+
+  void propagate_data_forward_white_box( node<Ntk> const& n )
+  {
+    uint32_t index = ntk.node_to_index( n );
+    auto& node_data = node_match[index];
+    auto const& gate = ntk.get_binding( n );
+
+    /* propagate arrival time */
+    double arrival = 0;
+    ntk.foreach_fanin( n, [&]( auto const& f, auto i ) {
+      uint32_t f_index = ntk.node_to_index( ntk.get_node( f ) );
+      uint8_t phase = ntk.is_complemented( f ) ? 1 : 0;
+      double propagation_delay = std::max( gate.pins[i].rise_block_delay, gate.pins[i].fall_block_delay );
+      arrival = std::max( arrival, node_match[f_index].arrival[phase] + propagation_delay );
+    } );
+
+    /* set data */
+    node_data.arrival[0] = arrival;
+    node_data.arrival[1] = arrival + lib_inv_delay;
+    node_data.area[0] = node_data.area[1] = gate.area;
+    node_data.flows[0] = node_data.area[0] / node_data.est_refs[2];
+    node_data.flows[1] = node_data.flows[0] + lib_inv_area;
+  }
+
+  void propagate_data_backward_white_box( node<Ntk> const& n )
+  {
+    uint32_t index = ntk.node_to_index( n );
+    auto& node_data = node_match[index];
+    auto const& gate = ntk.get_binding( n );
+
+    assert( node_data.map_refs[0] || node_data.map_refs[1] );
+
+    /* propagate required time over the output inverter if present */
+    if ( node_data.map_refs[1] > 0 )
+    {
+      node_data.required[0] = std::min( node_data.required[0], node_data.required[1] - lib_inv_delay );
+    }
+
+    if ( node_data.map_refs[0] )
+      assert( node_data.arrival[0] < node_data.required[0] + epsilon );
+    if ( node_data.map_refs[1] )
+      assert( node_data.arrival[1] < node_data.required[1] + epsilon );
+
+    ntk.foreach_fanin( n, [&]( auto const& f, auto i ) {
+      uint32_t f_index = ntk.node_to_index( ntk.get_node( f ) );
+      uint8_t phase = ntk.is_complemented( f ) ? 1 : 0;
+      double propagation_delay = std::max( gate.pins[i].rise_block_delay, gate.pins[i].fall_block_delay );
+      node_match[f_index].required[phase] = std::min( node_match[f_index].required[phase], node_data.required[0] - propagation_delay );
+    } );
+  }
+
+  void match_constants( uint32_t index )
+  {
+    auto& node_data = node_match[index];
+
+    kitty::static_truth_table<6> zero_tt;
+    auto const supergates_zero = library.get_supergates( zero_tt );
+    auto const supergates_one = library.get_supergates( ~zero_tt );
+
+    /* Not available in the library */
+    if ( supergates_zero == nullptr && supergates_one == nullptr )
+    {
+      return;
+    }
+    /* if only one is available, the other is obtained using an inverter */
+    if ( supergates_zero != nullptr )
+    {
+      node_data.best_supergate[0] = &( ( *supergates_zero )[0] );
+      node_data.arrival[0] = node_data.best_supergate[0]->tdelay[0];
+      node_data.area[0] = node_data.best_supergate[0]->area;
+      node_data.phase[0] = 0;
+    }
+    if ( supergates_one != nullptr )
+    {
+      node_data.best_supergate[1] = &( ( *supergates_one )[0] );
+      node_data.arrival[1] = node_data.best_supergate[1]->tdelay[0];
+      node_data.area[1] = node_data.best_supergate[1]->area;
+      node_data.phase[1] = 0;
+    }
+    else
+    {
+      node_data.same_match = true;
+      node_data.arrival[1] = node_data.arrival[0] + lib_inv_delay;
+      node_data.area[1] = node_data.area[0] + lib_inv_area;
+      node_data.phase[1] = 1;
+    }
+    if ( supergates_zero == nullptr )
+    {
+      node_data.same_match = true;
+      node_data.arrival[0] = node_data.arrival[1] + lib_inv_delay;
+      node_data.area[0] = node_data.area[1] + lib_inv_area;
+      node_data.phase[0] = 1;
+    }
+  }
+
+  template<bool DO_AREA>
+  bool match_multioutput( node<Ntk> const& n )
+  {
+    /* extract outputs tuple */
+    uint32_t index = ntk.node_to_index( n );
+    multi_match_t const& tuple_data = multi_node_match[node_tuple_match[index]][0];
+
+    /* get the cut */
+    auto const& cut0 = cuts[tuple_data[0].node_index][tuple_data[0].cut_index];
+
+    /* local values storage */
+    std::array<double, max_multioutput_output_size> arrival;
+    std::array<float, max_multioutput_output_size> area_flow;
+    std::array<float, max_multioutput_output_size> area;
+    std::array<uint8_t, max_multioutput_output_size> phase;
+    std::array<uint16_t, max_multioutput_output_size> pin_phase;
+    std::array<double, max_multioutput_output_size> est_refs;
+    std::array<uint32_t, max_multioutput_output_size> cut_index;
+    bool mapped_multioutput = false;
+
+    uint8_t iteration_phase = cut0->supergates[0] == nullptr ? 1 : 0;
+
+    /* iterate for each possible match */
+    for ( auto i = 0; i < cut0->supergates[iteration_phase]->size(); ++i )
+    {
+      /* store local validity and comparison info */
+      bool valid = true;
+      bool is_best = true;
+      bool respects_required = true;
+      double old_flow_sum = 0;
+
+      /* iterate for each output of the multi-output gate */
+      for ( auto j = 0; j < max_multioutput_output_size; ++j )
+      {
+        uint32_t node_index = tuple_data[j].node_index;
+        cut_index[j] = tuple_data[j].cut_index;
+        auto& node_data = node_match[node_index];
+        auto const& cut = cuts[node_index][cut_index[j]];
+        uint8_t phase_inverted = cut->supergates[0] == nullptr ? 1 : 0;
+        supergate<NInputs> const& gate = ( *( cut->supergates[phase_inverted] ) )[i];
+
+        /* protection on complicated duplicated nodes to remap to multioutput */
+        if ( !node_data.same_match )
+          return false;
+
+        /* get the output phase */
+        pin_phase[j] = gate.polarity;
+        phase[j] = ( gate.polarity >> NInputs ) ^ phase_inverted;
+        uint8_t old_phase = node_data.phase[phase[j]];
+
+        /* compute arrival */
+        arrival[j] = 0.0;
+        auto ctr = 0u;
+        for ( auto l : cut )
+        {
+          double arrival_pin = node_match[l].arrival[( gate.polarity >> ctr ) & 1] + gate.tdelay[ctr];
+          arrival[j] = std::max( arrival[j], arrival_pin );
+          ++ctr;
+        }
+
+        /* check required time: same_match is true */
+        if constexpr ( DO_AREA )
+        {
+          if ( arrival[j] > node_data.required[phase[j]] + epsilon )
+          {
+            valid = false;
+            break;
+          }
+          if ( arrival[j] + lib_inv_delay > node_data.required[phase[j] ^ 1] + epsilon )
+          {
+            valid = false;
+            break;
+          }
+        }
+
+        /* check required time of the current solution */
+        if ( node_data.arrival[phase[j]] > node_data.required[phase[j]] )
+          respects_required = false;
+        if ( node_data.same_match && node_data.arrival[phase[j] ^ 1] > node_data.required[phase[j] ^ 1] )
+          respects_required = false;
+
+        /* compute area flow */
+        old_flow_sum += node_data.flows[phase[j]];
+        node_data.phase[phase[j]] = gate.polarity;
+        area[j] = gate.area;
+        area_flow[j] = gate.area + cut_leaves_flow( cut, n, phase[j] );
+        node_data.phase[phase[j]] = old_phase;
+
+        /* local evaluation for delay (area flow improvement is approximated) */
+        if constexpr ( !DO_AREA )
+        {
+          /* recompute local area flow of previous matches */
+          double mapped_flow = node_data.flows[phase[j]];
+
+          if ( node_data.multioutput_match[phase[j]] )
+          {
+            /* recompute estimation for multi-output gate */
+            float k_est = 0;
+            for ( auto k = 0; k < max_multioutput_output_size; ++k )
+            {
+              uint32_t index_k = tuple_data[k].node_index;
+              k_est += node_match[index_k].est_refs[2];
+            }
+            mapped_flow *= k_est;
+          }
+          else
+          {
+            mapped_flow *= node_data.est_refs[2];
+          }
+
+          auto const& mapped_cut = cuts[node_index][node_data.best_cut[phase[j]]];
+          if ( !compare_map<DO_AREA>( arrival[j], node_data.arrival[phase[j]], area_flow[j], mapped_flow, cut.size(), mapped_cut.size() ) )
+          {
+            is_best = false;
+            break;
+          }
+        }
+
+        /* quit exit to not unmap phases, TODO: implement it well */
+        /* current version may lead to delay increase */
+        est_refs[j] = node_data.est_refs[2];
+      }
+
+      /* not better than individual gates */
+      if ( !valid )
+        continue;
+
+      if constexpr ( !DO_AREA )
+      {
+        if ( !is_best )
+          continue;
+      }
+
+      /* combine evaluation for precise area flow estimantion */
+      double flow_sum = 0;
+      double combined_est_refs = 0;
+      for ( auto j = 0; j < max_multioutput_output_size; ++j )
+      {
+        flow_sum += area_flow[j];
+        combined_est_refs += est_refs[j];
+      }
+      flow_sum = flow_sum / combined_est_refs;
+
+      /* not better than individual gates */
+      if ( respects_required && ( flow_sum > old_flow_sum + epsilon ) )
+        continue;
+
+      mapped_multioutput = true;
+
+      /* commit multi-output gate */
+      for ( uint32_t j = 0; j < max_multioutput_output_size; ++j )
+      {
+        uint32_t node_index = tuple_data[j].node_index;
+        auto& node_data = node_match[node_index];
+        auto const& cut = cuts[node_index][cut_index[j]];
+        uint8_t phase_inverted = cut->supergates[0] == nullptr ? 1 : 0;
+        supergate<NInputs> const& gate = ( *( cut->supergates[phase_inverted] ) )[i];
+
+        uint8_t mapped_phase = phase[j];
+        node_data.multioutput_match[mapped_phase] = true;
+
+        node_data.best_supergate[mapped_phase] = &gate;
+        node_data.best_cut[mapped_phase] = cut_index[j];
+        node_data.phase[mapped_phase] = pin_phase[j];
+        node_data.arrival[mapped_phase] = arrival[j];
+        node_data.area[mapped_phase] = area[j]; /* partial area contribution */
+        node_data.flows[mapped_phase] = flow_sum;
+
+        assert( node_data.arrival[mapped_phase] < node_data.required[mapped_phase] + epsilon );
+
+        /* select opposite phase */
+        mapped_phase ^= 1;
+        node_data.multioutput_match[mapped_phase] = true;
+        node_data.best_supergate[mapped_phase] = nullptr;
+        node_data.best_cut[mapped_phase] = cut_index[j];
+        node_data.phase[mapped_phase] = pin_phase[j];
+        node_data.arrival[mapped_phase] = arrival[j] + lib_inv_delay;
+        node_data.area[mapped_phase] = area[j];                  /* partial area contribution */
+        node_data.flows[mapped_phase] = flow_sum + lib_inv_area; /* TODO: check quality */
+
+        assert( node_data.arrival[mapped_phase] < node_data.required[mapped_phase] + epsilon );
+      }
+    }
+
+    return mapped_multioutput;
+  }
+
+  template<bool SwitchActivity>
+  bool match_multioutput_exact( node<Ntk> const& n, bool last_round )
+  {
+    /* extract outputs tuple */
+    uint32_t index = ntk.node_to_index( n );
+    multi_match_t const& tuple_data = multi_node_match[node_tuple_match[index]][0];
+
+    /* local values storage */
+    std::array<float, max_multioutput_output_size> best_exact_area;
+
+    for ( int j = max_multioutput_output_size - 1; j >= 0; --j )
+    {
+      /* protection on complicated duplicated nodes to remap to multioutput */
+      if ( !node_match[tuple_data[j].node_index].same_match )
+        return false;
+    }
+
+    /* if "same match" and used in the cover dereference the leaves (reverse topo order) */
+    for ( int j = max_multioutput_output_size - 1; j >= 0; --j )
+    {
+      uint32_t node_index = tuple_data[j].node_index;
+      best_exact_area[j] = node_match[node_index].flows[0] * node_match[node_index].est_refs[2];
+      uint8_t selected_phase = node_match[node_index].best_supergate[0] == nullptr ? 1 : 0;
+
+      if ( node_match[node_index].map_refs[2] != 0 )
+      {
+        /* match is always single output here */
+        auto const& cut = cuts[node_index][node_match[node_index].best_cut[0]];
+        uint8_t use_phase = node_match[node_index].best_supergate[0] != nullptr ? 0 : 1;
+        best_exact_area[j] = cut_deref<SwitchActivity>( cut, ntk.index_to_node( node_index ), use_phase );
+
+        /* mapping a non referenced phase */
+        if ( node_match[node_index].map_refs[selected_phase] == 0 )
+          best_exact_area[j] += lib_inv_area;
+      }
+    }
+
+    /* if one of the outputs is not referenced, do not use multi-output gate */
+    bool skip = false;
+    if ( last_round )
+    {
+      for ( uint32_t j = 0; j < max_multioutput_output_size; ++j )
+      {
+        uint32_t node_index = tuple_data[j].node_index;
+        if ( node_match[node_index].map_refs[2] == 0 )
+        {
+          skip = true;
+          break;
+        }
+      }
+    }
+
+    bool mapped_multioutput = false;
+
+    /* perform mapping */
+    if ( !skip )
+    {
+      mapped_multioutput = match_multioutput_exact_core<SwitchActivity>( tuple_data, best_exact_area );
+    }
+
+    /* if "same match" and used in the cover reference the leaves (topo order) */
+    for ( auto j = 0; j < max_multioutput_output_size; ++j )
+    {
+      uint32_t node_index = tuple_data[j].node_index;
+
+      if ( node_match[node_index].map_refs[2] != 0 )
+      {
+        uint8_t use_phase = node_match[node_index].best_supergate[0] != nullptr ? 0 : 1;
+        auto const& best_cut = cuts[node_index][node_match[node_index].best_cut[use_phase]];
+        cut_ref<SwitchActivity>( best_cut, ntk.index_to_node( node_index ), use_phase );
+      }
+    }
+
+    return mapped_multioutput;
+  }
+
+  template<bool SwitchActivity>
+  inline bool match_multioutput_exact_core( multi_match_t const& tuple_data, std::array<float, max_multioutput_output_size>& best_exact_area )
+  {
+    /* get the cut representative */
+    auto const& cut0 = cuts[tuple_data[0].node_index][tuple_data[0].cut_index];
+
+    /* local values storage */
+    std::array<double, max_multioutput_output_size> arrival;
+    std::array<float, max_multioutput_output_size> area_exact;
+    std::array<float, max_multioutput_output_size> area;
+    std::array<uint8_t, max_multioutput_output_size> phase;
+    std::array<uint16_t, max_multioutput_output_size> pin_phase;
+    std::array<uint32_t, max_multioutput_output_size> cut_index;
+
+    uint8_t iteration_phase = cut0->supergates[0] == nullptr ? 1 : 0;
+
+    bool mapped_multioutput = false;
+
+    /* iterate for each possible match */
+    for ( auto i = 0; i < cut0->supergates[iteration_phase]->size(); ++i )
+    {
+      /* store local validity and comparison info */
+      bool valid = true;
+      bool is_best = true;
+      bool respects_required = true;
+      uint32_t it_counter = 0;
+
+      /* iterate for each output of the multi-output gate (reverse topo order) */
+      for ( int j = max_multioutput_output_size - 1; j >= 0; --j )
+      {
+        uint32_t node_index = tuple_data[j].node_index;
+        cut_index[j] = tuple_data[j].cut_index;
+        auto& node_data = node_match[node_index];
+        auto const& cut = cuts[node_index][cut_index[j]];
+        uint8_t phase_inverted = cut->supergates[0] == nullptr ? 1 : 0;
+        supergate<NInputs> const& gate = ( *( cut->supergates[phase_inverted] ) )[i];
+        ++it_counter;
+
+        /* get the output phase and area */
+        pin_phase[j] = gate.polarity;
+        phase[j] = ( gate.polarity >> NInputs ) ^ phase_inverted;
+        area[j] = gate.area;
+
+        /* compute arrival */
+        arrival[j] = 0.0;
+        auto ctr = 0u;
+        for ( auto l : cut )
+        {
+          double arrival_pin = node_match[l].arrival[( gate.polarity >> ctr ) & 1] + gate.tdelay[ctr];
+          arrival[j] = std::max( arrival[j], arrival_pin );
+          ++ctr;
+        }
+
+        /* check required time */
+        if ( arrival[j] > node_data.required[phase[j]] + epsilon )
+        {
+          valid = false;
+          break;
+        }
+        if ( arrival[j] + lib_inv_delay > node_data.required[phase[j] ^ 1] + epsilon )
+        {
+          valid = false;
+          break;
+        }
+
+        /* check required time of current solution */
+        if ( node_data.arrival[phase[j]] > node_data.required[phase[j]] )
+          respects_required = false;
+        if ( node_data.arrival[phase[j] ^ 1] > node_data.required[phase[j] ^ 1] )
+          respects_required = false;
+
+        /* compute exact area for match: needed only for the first node (leaves are shared) */
+        if ( it_counter == 1 )
+        {
+          auto old_phase = node_data.phase[phase[j]];
+          auto old_area = node_data.area[phase[j]];
+          node_data.phase[phase[j]] = pin_phase[j];
+          node_data.area[phase[j]] = area[j];
+          area_exact[j] = cut_measure_mffc<SwitchActivity>( cut, ntk.index_to_node( node_index ), phase[j] );
+          node_data.phase[phase[j]] = old_phase;
+          node_data.area[phase[j]] = old_area;
+        }
+        else
+        {
+          area_exact[j] = area[j];
+        }
+
+        /* Add output inverter cost if mapping a non referenced phase */
+        if ( node_data.map_refs[phase[j]] == 0 && node_data.map_refs[phase[j] ^ 1] > 0 )
+        {
+          area_exact[j] += lib_inv_area;
+        }
+      }
+
+      /* check quality: TODO add output inverter in the cost if necessary */
+      float best_exact_area_total = 0;
+      float area_exact_total = 0;
+      for ( auto j = 0; j < max_multioutput_output_size; ++j )
+      {
+        best_exact_area_total += best_exact_area[j];
+        area_exact_total += area_exact[j];
+      }
+
+      /* not better than individual gates */
+      if ( !valid || ( area_exact_total > best_exact_area_total - epsilon && respects_required ) )
+      {
+        continue;
+      }
+
+      mapped_multioutput = true;
+
+      /* commit multi-output gate (topo order) */
+      for ( uint32_t j = 0; j < max_multioutput_output_size; ++j )
+      {
+        uint32_t node_index = tuple_data[j].node_index;
+        auto& node_data = node_match[node_index];
+        auto const& cut = cuts[node_index][cut_index[j]];
+        uint8_t phase_inverted = cut->supergates[0] == nullptr ? 1 : 0;
+        supergate<NInputs> const& gate = ( *( cut->supergates[phase_inverted] ) )[i];
+
+        uint8_t mapped_phase = phase[j];
+        best_exact_area[j] = area_exact[j];
+
+        if ( node_data.map_refs[phase[j]] == 0 && node_data.map_refs[phase[j] ^ 1] > 0 )
+        {
+          best_exact_area[j] += lib_inv_area;
+        }
+
+        /* write data */
+        node_data.multioutput_match[mapped_phase] = true;
+        node_data.best_supergate[mapped_phase] = &gate;
+        node_data.best_cut[mapped_phase] = cut_index[j];
+        node_data.phase[mapped_phase] = pin_phase[j];
+        node_data.arrival[mapped_phase] = arrival[j];
+        node_data.area[mapped_phase] = area[j]; /* partial area contribution */
+
+        node_data.flows[mapped_phase] = area_exact[j] / node_data.est_refs[2]; /* partial exact area contribution */
+        /* select opposite phase */
+        mapped_phase ^= 1;
+        node_data.multioutput_match[mapped_phase] = true;
+        node_data.best_supergate[mapped_phase] = nullptr;
+        node_data.best_cut[mapped_phase] = cut_index[j];
+        node_data.phase[mapped_phase] = pin_phase[j];
+        node_data.arrival[mapped_phase] = arrival[j] + lib_inv_delay;
+        node_data.area[mapped_phase] = area[j]; /* partial area contribution */
+        node_data.flows[mapped_phase] = area_exact[j] / node_data.est_refs[2];
+
+        assert( node_data.arrival[mapped_phase] < node_data.required[mapped_phase] + epsilon );
+      }
+    }
+
+    return mapped_multioutput;
+  }
+
+  template<bool DO_AREA>
+  void multi_node_update( node<Ntk> const& n )
+  {
+    uint32_t check_index = ntk.node_to_index( n );
+    multi_match_t const& tuple_data = multi_node_match[node_tuple_match[ntk.node_to_index( n )]][0];
+    uint64_t signature = 0;
+
+    /* check if a node is in TFI: there is a path of length > 1 */
+    bool in_tfi = false;
+    node<Ntk> min_node = n;
+    for ( auto j = 0; j < max_multioutput_output_size - 1; ++j )
+    {
+      if ( tuple_data[j].in_tfi )
+      {
+        min_node = ntk.index_to_node( tuple_data[j].node_index );
+        in_tfi = true;
+        signature |= UINT64_C( 1 ) << ( tuple_data[j].node_index & 0x3f );
+      }
+    }
+
+    if ( !in_tfi )
+      return;
+
+    /* recompute data in between: should I mark the leaves? (not necessary under some assumptions) */
+    ntk.incr_trav_id();
+    ntk.foreach_fanin( n, [&]( auto const& f ) {
+      /* TODO: this recursion works as it is for a maximum multioutput value of 2 */
+      multi_node_update_rec<DO_AREA>( ntk.get_node( f ), min_node + 1, signature );
+    } );
+  }
+
+  template<bool DO_AREA>
+  void multi_node_update_rec( node<Ntk> const& n, uint32_t min_index, uint64_t& signature )
+  {
+    uint32_t index = ntk.node_to_index( n );
+
+    if ( index < min_index )
+      return;
+    if ( ntk.visited( n ) == ntk.trav_id() )
+      return;
+
+    ntk.set_visited( n, ntk.trav_id() );
+    ntk.foreach_fanin( n, [&]( auto const& f ) {
+      multi_node_update_rec<DO_AREA>( ntk.get_node( f ), min_index, signature );
+    } );
+
+    /* update the node if uses an updated leaf */
+    auto& node_data = node_match[index];
+    bool leaf_used = multi_node_update_cut_check( index, signature, 0 );
+
+    if ( !node_data.same_match )
+      leaf_used |= multi_node_update_cut_check( index, signature, 1 );
+
+    if ( !leaf_used )
+      return;
+
+    signature |= UINT64_C( 1 ) << ( index & 0x3f );
+
+    /* avoid cycles by recomputing arrival times for multi-output gates or decomposing them */
+    if ( node_data.same_match && node_data.multioutput_match[0] )
+    {
+      propagate_arrival_node( n );
+      /* check required time */
+      if ( node_data.arrival[0] < node_data.required[0] + epsilon && node_data.arrival[1] < node_data.required[1] + epsilon )
+        return;
+    }
+
+    /* match positive phase */
+    match_phase<DO_AREA>( n, 0u );
+
+    /* match negative phase */
+    match_phase<DO_AREA>( n, 1u );
+
+    /* try to drop one phase */
+    match_drop_phase<DO_AREA, false>( n, 0 );
+
+    assert( node_data.arrival[0] < node_data.required[0] + epsilon );
+    assert( node_data.arrival[1] < node_data.required[1] + epsilon );
+  }
+
+  template<bool SwitchActivity>
+  void multi_node_update_exact( node<Ntk> const& n )
+  {
+    uint32_t check_index = ntk.node_to_index( n );
+    multi_match_t const& tuple_data = multi_node_match[node_tuple_match[ntk.node_to_index( n )]][0];
+    uint64_t signature = 0;
+
+    /* check if a node is in TFI: there is a path of length > 1 */
+    bool in_tfi = false;
+    node<Ntk> min_node = n;
+    for ( auto j = 0; j < max_multioutput_output_size - 1; ++j )
+    {
+      if ( tuple_data[j].in_tfi )
+      {
+        min_node = ntk.index_to_node( tuple_data[j].node_index );
+        in_tfi = true;
+        signature |= UINT64_C( 1 ) << ( tuple_data[j].node_index & 0x3f );
+      }
+    }
+
+    if ( !in_tfi )
+      return;
+
+    /* recompute data in between: should I mark the leaves? (not necessary under some assumptions) */
+    ntk.incr_trav_id();
+    ntk.foreach_fanin( n, [&]( auto const& f ) {
+      /* TODO: this recursion works as it is for a maximum multioutput value of 2 */
+      multi_node_update_exact_rec<SwitchActivity>( ntk.get_node( f ), min_node + 1, signature );
+    } );
+  }
+
+  template<bool SwitchActivity>
+  void multi_node_update_exact_rec( node<Ntk> const& n, uint32_t min_index, uint64_t& signature )
+  {
+    uint32_t index = ntk.node_to_index( n );
+
+    if ( index < min_index )
+      return;
+    if ( ntk.visited( n ) == ntk.trav_id() )
+      return;
+
+    ntk.set_visited( n, ntk.trav_id() );
+    ntk.foreach_fanin( n, [&]( auto const& f ) {
+      multi_node_update_exact_rec<SwitchActivity>( ntk.get_node( f ), min_index, signature );
+    } );
+
+    /* update the node if uses an updated leaf */
+    auto& node_data = node_match[index];
+    bool leaf_used = multi_node_update_cut_check( index, signature, 0 );
+
+    if ( !node_data.same_match )
+      leaf_used |= multi_node_update_cut_check( index, signature, 1 );
+
+    if ( !leaf_used )
+      return;
+
+    signature |= UINT64_C( 1 ) << ( index & 0x3f );
+
+    assert( !node_data.multioutput_match[0] );
+    assert( !node_data.multioutput_match[1] );
+
+    if ( node_data.same_match && node_data.map_refs[2] != 0 )
+    {
+      uint8_t use_phase = node_data.best_supergate[0] != nullptr ? 0 : 1;
+      auto const& best_cut = cuts[index][node_data.best_cut[use_phase]];
+      cut_deref<SwitchActivity>( best_cut, n, use_phase );
+    }
+
+    /* match positive phase */
+    match_phase_exact<SwitchActivity>( n, 0u );
+
+    /* match negative phase */
+    match_phase_exact<SwitchActivity>( n, 1u );
+
+    /* try to drop one phase */
+    match_drop_phase<true, true>( n, 0 );
+
+    assert( node_data.arrival[0] < std::numeric_limits<float>::max() );
+    assert( node_data.arrival[1] < std::numeric_limits<float>::max() );
+  }
+
+  inline void match_multioutput_propagate_required( node<Ntk> const& n )
+  {
+    /* extract outputs tuple */
+    uint32_t index = ntk.node_to_index( n );
+    multi_match_t const& tuple_data = multi_node_match[node_tuple_match[index]][0];
+
+    for ( int j = max_multioutput_output_size - 1; j >= 0; --j )
+    {
+      const auto node_index = tuple_data[j].node_index;
+      match_propagate_required( node_index );
+    }
+  }
+
+  template<bool DO_AREA>
+  bool match_multi_add_cuts( node<Ntk> const& n )
+  {
+    uint32_t index = ntk.node_to_index( n );
+    auto& matches = multi_node_match[node_tuple_match[index]];
+
+    /* get the cuts */
+    auto tuple_data_it = matches.begin();
+    while ( tuple_data_it != matches.end() )
+    {
+      multi_match_t& tuple_data = *tuple_data_it;
+      uint32_t cut_index = tuple_data[0].cut_index;
+      auto& cut_pair = multi_cut_set[cut_index];
+      bool remove_entry = false;
+
+      /* insert multi-output cuts into the standard cut set */
+      for ( auto i = 0; i < max_multioutput_output_size; ++i )
+      {
+        uint64_t node_index = tuple_data[i].node_index;
+        auto& cut = cut_pair[i];
+        auto single_cut = cut_pair[i];
+
+        auto& rcuts = cuts[node_index];
+
+        /* not enough space in the data structure: abort */
+        if ( rcuts.size() == max_cut_num )
+        {
+          remove_entry = true;
+          break;
+        }
+
+        /* insert single cut variation if unique (for delay preservation) */
+        if ( !rcuts.is_contained( single_cut ) )
+        {
+          compute_cut_data<DO_AREA>( single_cut, ntk.index_to_node( node_index ) );
+          rcuts.append_cut( single_cut );
+
+          /* not enough space in the data structure: abort */
+          if ( rcuts.size() == max_cut_num )
+          {
+            rcuts.limit( rcuts.size() - 1 );
+            remove_entry = true;
+            break;
+          }
+        }
+
+        /* add multi-output cut */
+        uint32_t num_cuts_pre = rcuts.size();
+        cut->ignore = true;
+        rcuts.append_cut( cut );
+
+        uint32_t num_cuts_after = rcuts.size();
+        assert( num_cuts_after == num_cuts_pre + 1 );
+
+        rcuts.limit( num_cuts_pre );
+
+        /* update tuple data */
+        tuple_data[i].cut_index = num_cuts_pre;
+      }
+
+      if ( remove_entry )
+        matches.erase( tuple_data_it );
+      else
+        ++tuple_data_it;
+    }
+
+    /* matches do not fit in the data structure, remove multi-output option */
+    if ( matches.empty() )
+      node_tuple_match[index] = UINT32_MAX;
+
+    /* return if the insertion is (partially) successful */
+    return !matches.empty();
+  }
+
+  inline bool multi_node_update_cut_check( uint32_t index, uint64_t signature, uint8_t phase )
+  {
+    auto const& cut = cuts[index][node_match[index].best_cut[phase]];
+
+    if ( ( signature & cut.signature() ) > 0 )
+      return true;
+
+    return false;
+  }
+
+  bool remove_unused_multioutput()
+  {
+    /* TODO: update required times */
+    for ( auto it = topo_order.rbegin(); it != topo_order.rend(); ++it )
+    {
+      if ( ntk.is_constant( *it ) || ntk.is_pi( *it ) )
+        continue;
+
+      auto index = ntk.node_to_index( *it );
+
+      /* get used multi-output gates */
+      if ( node_tuple_match[index] == UINT32_MAX )
+        continue;
+
+      if ( node_match[index].same_match && !node_match[index].multioutput_match[0] )
+        continue;
+
+      if ( !node_match[index].same_match && !( node_match[index].multioutput_match[0] || node_match[index].multioutput_match[1] ) )
+        continue;
+
+      /* check if mapped to multi-output with unused outputs */
+      multi_match_t const& tuple_data = multi_node_match[node_tuple_match[index]][0];
+
+      bool used = false;
+      bool unused = false;
+      for ( auto j = 0; j < max_multioutput_output_size; ++j )
+      {
+        uint32_t node_index = tuple_data[j].node_index;
+        auto& node_data = node_match[node_index];
+
+        if ( node_data.best_supergate[0] != nullptr && node_data.multioutput_match[0] )
+        {
+          if ( node_data.map_refs[0] > 0 || ( node_data.same_match && node_data.map_refs[2] > 0 ) )
+            used = true;
+          else
+            unused = true;
+        }
+        else if ( node_data.best_supergate[1] != nullptr && node_data.multioutput_match[1] )
+        {
+          if ( node_data.map_refs[1] > 0 || ( node_data.same_match && node_data.map_refs[2] > 0 ) )
+            used = true;
+          else
+            unused = true;
+        }
+      }
+
+      if ( !used || !unused )
+        continue;
+
+      /* remap connected outputs (reverse topo order)*/
+      for ( int j = max_multioutput_output_size - 1; j >= 0; --j )
+      {
+        uint32_t node_index = tuple_data[j].node_index;
+        auto& node_data = node_match[node_index];
+        auto const n = ntk.index_to_node( node_index );
+
+        if ( node_data.map_refs[2] == 0 )
+          continue;
+
+        /* recursively deselect the best cut shared between
+         * the two phases if in use in the cover */
+        if ( node_data.same_match && node_data.map_refs[2] != 0 )
+        {
+          uint8_t use_phase = node_data.best_supergate[0] != nullptr ? 0 : 1;
+          auto const& best_cut = cuts[node_index][node_data.best_cut[use_phase]];
+          cut_deref<false>( best_cut, n, use_phase );
+        }
+
+        /* match positive phase */
+        match_phase_exact<false>( n, 0u );
+
+        /* match negative phase */
+        match_phase_exact<false>( n, 1u );
+
+        /* try to drop one phase */
+        match_drop_phase<true, true>( n, 0 );
+      }
+    }
+
+    double area_old = area;
+    bool success = set_mapping_refs<true>();
+
+    /* round stats */
+    if ( ps.verbose )
+    {
+      float area_gain = float( ( area_old - area ) / area_old * 100 );
+      std::string stats = fmt::format( "[i] Cleaning : Delay = {:>12.2f}  Area = {:>12.2f}  Gain = {:>5.2f} %  Inverters = {:>5}  Time = {:>5.2f}\n", delay, area, area_gain, inv, to_seconds( clock::now() - time_begin ) );
+      st.round_stats.push_back( stats );
+    }
+
+    return success;
+  }
+#pragma endregion
+
+#pragma region Mapping utils
+  inline double cut_leaves_flow( cut_t const& cut, node<Ntk> const& n, uint8_t phase )
+  {
+    double flow{ 0.0f };
+    auto const& node_data = node_match[ntk.node_to_index( n )];
+
+    uint8_t ctr = 0u;
+    for ( auto leaf : cut )
+    {
+      uint8_t leaf_phase = ( node_data.phase[phase] >> ctr++ ) & 1;
+      flow += node_match[leaf].flows[leaf_phase];
+    }
+
+    return flow;
+  }
+
+  template<bool SwitchActivity>
+  float cut_ref( cut_t const& cut, node<Ntk> const& n, uint8_t phase )
+  {
+    auto const& node_data = node_match[ntk.node_to_index( n )];
+    float count;
+
+    if constexpr ( SwitchActivity )
+      count = switch_activity[ntk.node_to_index( n )];
+    else
+      count = node_data.area[phase];
+
+    /* don't touch box */
+    if constexpr ( has_is_dont_touch_v<Ntk> )
+    {
+      if ( ntk.is_dont_touch( n ) )
+      {
+        return count;
+      }
+    }
+
+    uint8_t ctr = 0;
+    for ( auto leaf : cut )
+    {
+      /* compute leaf phase using the current gate */
+      uint8_t leaf_phase = ( node_data.phase[phase] >> ctr++ ) & 1;
+
+      if ( ntk.is_constant( ntk.index_to_node( leaf ) ) )
+      {
+        continue;
+      }
+      else if ( ntk.is_pi( ntk.index_to_node( leaf ) ) )
+      {
+        /* reference PIs, add inverter cost for negative phase */
+        if ( leaf_phase == 1u )
+        {
+          if ( node_match[leaf].map_refs[1]++ == 0u )
+          {
+            if constexpr ( SwitchActivity )
+              count += switch_activity[leaf];
+            else
+              count += lib_inv_area;
+          }
+          ++node_match[leaf].map_refs[2];
+        }
+        else
+        {
+          ++node_match[leaf].map_refs[0];
+          ++node_match[leaf].map_refs[2];
+        }
+        continue;
+      }
+
+      if ( node_match[leaf].same_match )
+      {
+        /* Add inverter area if not present yet and leaf node is implemented in the opposite phase */
+        if ( node_match[leaf].map_refs[leaf_phase]++ == 0u && node_match[leaf].best_supergate[leaf_phase] == nullptr )
+        {
+          if constexpr ( SwitchActivity )
+            count += switch_activity[leaf];
+          else
+            count += lib_inv_area;
+        }
+        /* Recursive referencing if leaf was not referenced */
+        if ( node_match[leaf].map_refs[2]++ == 0u )
+        {
+          auto const& best_cut = cuts[leaf][node_match[leaf].best_cut[leaf_phase]];
+          count += cut_ref<SwitchActivity>( best_cut, ntk.index_to_node( leaf ), leaf_phase );
+        }
+      }
+      else
+      {
+        ++node_match[leaf].map_refs[2];
+        if ( node_match[leaf].map_refs[leaf_phase]++ == 0u )
+        {
+          auto const& best_cut = cuts[leaf][node_match[leaf].best_cut[leaf_phase]];
+          count += cut_ref<SwitchActivity>( best_cut, ntk.index_to_node( leaf ), leaf_phase );
+        }
+      }
+    }
+    return count;
+  }
+
+  template<bool SwitchActivity>
+  float cut_deref( cut_t const& cut, node<Ntk> const& n, uint8_t phase )
+  {
+    auto const& node_data = node_match[ntk.node_to_index( n )];
+    float count;
+
+    if constexpr ( SwitchActivity )
+      count = switch_activity[ntk.node_to_index( n )];
+    else
+      count = node_data.area[phase];
+
+    /* don't touch box */
+    if constexpr ( has_is_dont_touch_v<Ntk> )
+    {
+      if ( ntk.is_dont_touch( n ) )
+      {
+        return count;
+      }
+    }
+
+    uint8_t ctr = 0;
+    for ( auto leaf : cut )
+    {
+      /* compute leaf phase using the current gate */
+      uint8_t leaf_phase = ( node_data.phase[phase] >> ctr++ ) & 1;
+
+      if ( ntk.is_constant( ntk.index_to_node( leaf ) ) )
+      {
+        continue;
+      }
+      else if ( ntk.is_pi( ntk.index_to_node( leaf ) ) )
+      {
+        /* dereference PIs, add inverter cost for negative phase */
+        if ( leaf_phase == 1u )
+        {
+          if ( --node_match[leaf].map_refs[1] == 0u )
+          {
+            if constexpr ( SwitchActivity )
+              count += switch_activity[leaf];
+            else
+              count += lib_inv_area;
+          }
+          --node_match[leaf].map_refs[2];
+        }
+        else
+        {
+          --node_match[leaf].map_refs[0];
+          --node_match[leaf].map_refs[2];
+        }
+        continue;
+      }
+
+      if ( node_match[leaf].same_match )
+      {
+        /* Add inverter area if it is used only by the current gate and leaf node is implemented in the opposite phase */
+        if ( --node_match[leaf].map_refs[leaf_phase] == 0u && node_match[leaf].best_supergate[leaf_phase] == nullptr )
+        {
+          if constexpr ( SwitchActivity )
+            count += switch_activity[leaf];
+          else
+            count += lib_inv_area;
+        }
+        /* Recursive dereferencing */
+        if ( --node_match[leaf].map_refs[2] == 0u )
+        {
+          auto const& best_cut = cuts[leaf][node_match[leaf].best_cut[leaf_phase]];
+          count += cut_deref<SwitchActivity>( best_cut, ntk.index_to_node( leaf ), leaf_phase );
+        }
+      }
+      else
+      {
+        --node_match[leaf].map_refs[2];
+        if ( --node_match[leaf].map_refs[leaf_phase] == 0u )
+        {
+          auto const& best_cut = cuts[leaf][node_match[leaf].best_cut[leaf_phase]];
+          count += cut_deref<SwitchActivity>( best_cut, ntk.index_to_node( leaf ), leaf_phase );
+        }
+      }
+    }
+    return count;
+  }
+
+  template<bool SwitchActivity>
+  float cut_measure_mffc( cut_t const& cut, node<Ntk> const& n, uint8_t phase )
+  {
+    tmp_visited.clear();
+
+    float count = cut_ref_visit<SwitchActivity>( cut, n, phase );
+
+    /* dereference visited */
+    for ( auto s : tmp_visited )
+    {
+      uint32_t leaf = s >> 1;
+      --node_match[leaf].map_refs[2];
+      --node_match[leaf].map_refs[s & 1];
+    }
+
+    return count;
+  }
+
+  template<bool SwitchActivity>
+  float cut_ref_visit( cut_t const& cut, node<Ntk> const& n, uint8_t phase )
+  {
+    auto const& node_data = node_match[ntk.node_to_index( n )];
+    float count;
+
+    if constexpr ( SwitchActivity )
+      count = switch_activity[ntk.node_to_index( n )];
+    else
+      count = node_data.area[phase];
+
+    /* don't touch box */
+    if constexpr ( has_is_dont_touch_v<Ntk> )
+    {
+      if ( ntk.is_dont_touch( n ) )
+      {
+        return count;
+      }
+    }
+
+    uint8_t ctr = 0;
+    for ( auto leaf : cut )
+    {
+      /* compute leaf phase using the current gate */
+      uint8_t leaf_phase = ( node_data.phase[phase] >> ctr++ ) & 1;
+
+      if ( ntk.is_constant( ntk.index_to_node( leaf ) ) )
+      {
+        continue;
+      }
+
+      /* add to visited */
+      tmp_visited.push_back( ( static_cast<uint64_t>( leaf ) << 1 ) | leaf_phase );
+
+      if ( ntk.is_pi( ntk.index_to_node( leaf ) ) )
+      {
+        /* reference PIs, add inverter cost for negative phase */
+        if ( leaf_phase == 1u )
+        {
+          if ( node_match[leaf].map_refs[1]++ == 0u )
+          {
+            if constexpr ( SwitchActivity )
+              count += switch_activity[leaf];
+            else
+              count += lib_inv_area;
+          }
+          ++node_match[leaf].map_refs[2];
+        }
+        else
+        {
+          ++node_match[leaf].map_refs[0];
+          ++node_match[leaf].map_refs[2];
+        }
+        continue;
+      }
+
+      if ( node_match[leaf].same_match )
+      {
+        /* Add inverter area if not present yet and leaf node is implemented in the opposite phase */
+        if ( node_match[leaf].map_refs[leaf_phase]++ == 0u && node_match[leaf].best_supergate[leaf_phase] == nullptr )
+        {
+          if constexpr ( SwitchActivity )
+            count += switch_activity[leaf];
+          else
+            count += lib_inv_area;
+        }
+        /* Recursive referencing if leaf was not referenced */
+        if ( node_match[leaf].map_refs[2]++ == 0u )
+        {
+          auto const& best_cut = cuts[leaf][node_match[leaf].best_cut[leaf_phase]];
+          count += cut_ref_visit<SwitchActivity>( best_cut, ntk.index_to_node( leaf ), leaf_phase );
+        }
+      }
+      else
+      {
+        ++node_match[leaf].map_refs[2];
+        if ( node_match[leaf].map_refs[leaf_phase]++ == 0u )
+        {
+          auto const& best_cut = cuts[leaf][node_match[leaf].best_cut[leaf_phase]];
+          count += cut_ref_visit<SwitchActivity>( best_cut, ntk.index_to_node( leaf ), leaf_phase );
+        }
+      }
+    }
+    return count;
+  }
+#pragma endregion
+
+#pragma region Initialize and dump the mapped network
+  void insert_buffers()
+  {
+    if ( lib_buf_id != UINT32_MAX )
+    {
+      double area_old = area;
+      bool buffers = false;
+
+      ntk.foreach_po( [&]( auto const& f ) {
+        auto const& n = ntk.get_node( f );
+        if ( !ntk.is_constant( n ) && ntk.is_pi( n ) && !ntk.is_complemented( f ) )
+        {
+          area += lib_buf_area;
+          delay = std::max( delay, node_match[ntk.node_to_index( n )].arrival[0] + lib_inv_delay );
+          buffers = true;
+        }
+      } );
+
+      /* round stats */
+      if ( ps.verbose && buffers )
+      {
+        std::stringstream stats{};
+        float area_gain = 0.0f;
+
+        area_gain = float( ( area_old - area ) / area_old * 100 );
+
+        stats << fmt::format( "[i] Buffering: Delay = {:>12.2f}  Area = {:>12.2f}  Gain = {:>5.2f} %  Inverters = {:>5}  Time = {:>5.2f}\n", delay, area, area_gain, inv, to_seconds( clock::now() - time_begin ) );
+        st.round_stats.push_back( stats.str() );
+      }
+    }
+  }
+
+  std::pair<binding_view<klut_network>, klut_map> initialize_map_network()
+  {
+    binding_view<klut_network> dest( library.get_gates() );
+    klut_map old2new;
+
+    old2new[ntk.node_to_index( ntk.get_node( ntk.get_constant( false ) ) )][0] = dest.get_constant( false );
+    old2new[ntk.node_to_index( ntk.get_node( ntk.get_constant( false ) ) )][1] = dest.get_constant( true );
+
+    ntk.foreach_pi( [&]( auto const& n ) {
+      old2new[ntk.node_to_index( n )][0] = dest.create_pi();
+    } );
+    return { dest, old2new };
+  }
+
+  std::pair<cell_view<block_network>, block_map> initialize_block_network()
+  {
+    cell_view<block_network> dest( library.get_cells() );
+    block_map old2new;
+
+    old2new[ntk.node_to_index( ntk.get_node( ntk.get_constant( false ) ) )][0] = dest.get_constant( false );
+    old2new[ntk.node_to_index( ntk.get_node( ntk.get_constant( false ) ) )][1] = dest.get_constant( true );
+
+    ntk.foreach_pi( [&]( auto const& n ) {
+      old2new[ntk.node_to_index( n )][0] = dest.create_pi();
+    } );
+    return { dest, old2new };
+  }
+
+  void init_topo_order()
+  {
+    topo_order.reserve( ntk.size() );
+
+    if ( multi_node_match.size() > 0 )
+    {
+      multi_init_topo_order();
+      return;
+    }
+
+    topo_view<Ntk>( ntk ).foreach_node( [this]( auto n ) {
+      topo_order.push_back( n );
+    } );
+  }
+
+  void finalize_cover( binding_view<klut_network>& res, klut_map& old2new )
+  {
+    uint32_t multioutput_count = 0;
+
+    for ( auto const& n : topo_order )
+    {
+      auto index = ntk.node_to_index( n );
+      auto const& node_data = node_match[index];
+
+      /* add inverter at PI if needed */
+      if ( ntk.is_constant( n ) )
+      {
+        if ( node_data.best_supergate[0] == nullptr && node_data.best_supergate[1] == nullptr )
+          continue;
+      }
+      else if ( ntk.is_pi( n ) )
+      {
+        if ( node_data.map_refs[1] > 0 )
+        {
+          old2new[index][1] = res.create_not( old2new[n][0] );
+          res.add_binding( res.get_node( old2new[index][1] ), lib_inv_id );
+        }
+        continue;
+      }
+
+      /* continue if cut is not in the cover */
+      if ( node_data.map_refs[2] == 0u )
+        continue;
+
+      /* don't touch box */
+      if constexpr ( has_is_dont_touch_v<Ntk> )
+      {
+        if ( ntk.is_dont_touch( n ) )
+        {
+          clone_box( res, old2new, index );
+          continue;
+        }
+      }
+
+      unsigned phase = ( node_data.best_supergate[0] != nullptr ) ? 0 : 1;
+
+      /* add used cut */
+      if ( node_data.same_match || node_data.map_refs[phase] > 0 )
+      {
+        create_lut_for_gate( res, old2new, index, phase );
+
+        /* add inverted version if used */
+        if ( node_data.same_match && node_data.map_refs[phase ^ 1] > 0 )
+        {
+          old2new[index][phase ^ 1] = res.create_not( old2new[index][phase] );
+          res.add_binding( res.get_node( old2new[index][phase ^ 1] ), lib_inv_id );
+        }
+
+        /* count multioutput gates */
+        if ( ps.map_multioutput && node_tuple_match[index] < UINT32_MAX - 1 && node_data.multioutput_match[phase] )
+        {
+          ++multioutput_count;
+        }
+      }
+
+      phase = phase ^ 1;
+      /* add the optional other match if used */
+      if ( !node_data.same_match && node_data.map_refs[phase] > 0 )
+      {
+        create_lut_for_gate( res, old2new, index, phase );
+
+        /* count multioutput gates */
+        if ( ps.map_multioutput && node_tuple_match[index] < UINT32_MAX - 1 && node_data.multioutput_match[phase] )
+        {
+          ++multioutput_count;
+        }
+      }
+
+      st.multioutput_gates = multioutput_count;
+    }
+
+    /* create POs */
+    ntk.foreach_po( [&]( auto const& f ) {
+      if ( ntk.is_complemented( f ) )
+      {
+        res.create_po( old2new[ntk.node_to_index( ntk.get_node( f ) )][1] );
+      }
+      else if ( !ntk.is_constant( ntk.get_node( f ) ) && ntk.is_pi( ntk.get_node( f ) ) && lib_buf_id != UINT32_MAX )
+      {
+        /* create buffers for POs */
+        static uint64_t _buf = 0x2;
+        kitty::dynamic_truth_table tt_buf( 1 );
+        kitty::create_from_words( tt_buf, &_buf, &_buf + 1 );
+        const auto buf = res.create_node( { old2new[ntk.node_to_index( ntk.get_node( f ) )][0] }, tt_buf );
+        res.create_po( buf );
+        res.add_binding( res.get_node( buf ), lib_buf_id );
+      }
+      else
+      {
+        res.create_po( old2new[ntk.node_to_index( ntk.get_node( f ) )][0] );
+      }
+    } );
+
+    /* write final results */
+    st.area = area;
+    st.delay = delay;
+    if ( ps.eswp_rounds )
+      st.power = compute_switching_power();
+  }
+
+  void finalize_cover_block( cell_view<block_network>& res, block_map& old2new )
+  {
+    uint32_t multioutput_count = 0;
+
+    /* get standard cells */
+    std::vector<standard_cell> const& lib = res.get_library();
+
+    /* get translation ID from GENLIB to STD_CELL */
+    std::vector<uint32_t> genlib_to_cell( library.get_gates().size() );
+    for ( standard_cell const& cell : lib )
+    {
+      for ( gate const& g : cell.gates )
+      {
+        genlib_to_cell[g.id] = cell.id;
+      }
+    }
+
+    for ( auto const& n : topo_order )
+    {
+      auto index = ntk.node_to_index( n );
+      auto const& node_data = node_match[index];
+
+      /* add inverter at PI if needed */
+      if ( ntk.is_constant( n ) )
+      {
+        if ( node_data.best_supergate[0] == nullptr && node_data.best_supergate[1] == nullptr )
+          continue;
+      }
+      else if ( ntk.is_pi( n ) )
+      {
+        if ( node_data.map_refs[1] > 0 )
+        {
+          old2new[index][1] = res.create_not( old2new[n][0] );
+          res.add_cell( res.get_node( old2new[index][1] ), genlib_to_cell[lib_inv_id] );
+        }
+        continue;
+      }
+
+      /* continue if cut is not in the cover */
+      if ( node_data.map_refs[2] == 0u )
+        continue;
+
+      /* don't touch box */
+      if constexpr ( has_is_dont_touch_v<Ntk> )
+      {
+        if ( ntk.is_dont_touch( n ) )
+        {
+          clone_box2( res, old2new, index, genlib_to_cell );
+          continue;
+        }
+      }
+
+      unsigned phase = ( node_data.best_supergate[0] != nullptr ) ? 0 : 1;
+
+      /* add used cut */
+      if ( node_data.same_match || node_data.map_refs[phase] > 0 )
+      {
+        /* create multioutput gates */
+        if ( ps.map_multioutput && node_data.multioutput_match[phase] )
+        {
+          assert( node_data.same_match == true );
+
+          if ( node_tuple_match[index] < UINT32_MAX - 1 )
+          {
+            ++multioutput_count;
+            create_block_for_gate( res, old2new, index, phase, genlib_to_cell );
+            /* TODO: implement */
+          }
+          continue;
+        }
+
+        create_lut_for_gate2( res, old2new, index, phase, genlib_to_cell );
+
+        /* add inverted version if used */
+        if ( node_data.same_match && node_data.map_refs[phase ^ 1] > 0 )
+        {
+          old2new[index][phase ^ 1] = res.create_not( old2new[index][phase] );
+          res.add_cell( res.get_node( old2new[index][phase ^ 1] ), genlib_to_cell[lib_inv_id] );
+        }
+      }
+
+      phase = phase ^ 1;
+      /* add the optional other match if used */
+      if ( !node_data.same_match && node_data.map_refs[phase] > 0 )
+      {
+        create_lut_for_gate2( res, old2new, index, phase, genlib_to_cell );
+        assert( ps.map_multioutput && !node_data.multioutput_match[phase] );
+      }
+    }
+
+    /* create POs */
+    ntk.foreach_po( [&]( auto const& f ) {
+      if ( ntk.is_complemented( f ) )
+      {
+        res.create_po( old2new[ntk.node_to_index( ntk.get_node( f ) )][1] );
+      }
+      else if ( !ntk.is_constant( ntk.get_node( f ) ) && ntk.is_pi( ntk.get_node( f ) ) && lib_buf_id != UINT32_MAX )
+      {
+        /* create buffers for POs */
+        static uint64_t _buf = 0x2;
+        kitty::dynamic_truth_table tt_buf( 1 );
+        kitty::create_from_words( tt_buf, &_buf, &_buf + 1 );
+        const auto buf = res.create_node( { old2new[ntk.node_to_index( ntk.get_node( f ) )][0] }, tt_buf );
+        res.create_po( buf );
+        res.add_cell( res.get_node( buf ), genlib_to_cell[lib_buf_id] );
+      }
+      else
+      {
+        res.create_po( old2new[ntk.node_to_index( ntk.get_node( f ) )][0] );
+      }
+    } );
+
+    /* write final results */
+    st.area = area;
+    st.delay = delay;
+    st.multioutput_gates = multioutput_count;
+    if ( ps.eswp_rounds )
+      st.power = compute_switching_power();
+  }
+
+  void create_lut_for_gate( binding_view<klut_network>& res, klut_map& old2new, uint32_t index, unsigned phase )
+  {
+    auto const& node_data = node_match[index];
+    auto const& best_cut = cuts[index][node_data.best_cut[phase]];
+    auto const& gate = node_data.best_supergate[phase]->root;
+
+    /* permutate and negate to obtain the matched gate truth table */
+    std::vector<signal<klut_network>> children( gate->num_vars );
+
+    auto ctr = 0u;
+    for ( auto l : best_cut )
+    {
+      if ( ctr >= gate->num_vars )
+        break;
+      children[node_data.best_supergate[phase]->permutation[ctr]] = old2new[l][( node_data.phase[phase] >> ctr ) & 1];
+      ++ctr;
+    }
+
+    if ( !gate->is_super )
+    {
+      /* create the node */
+      auto f = res.create_node( children, gate->function );
+      res.add_binding( res.get_node( f ), gate->root->id );
+
+      /* add the node in the data structure */
+      old2new[index][phase] = f;
+    }
+    else
+    {
+      /* supergate, create sub-gates */
+      auto f = create_lut_for_gate_rec( res, *gate, children );
+
+      /* add the node in the data structure */
+      old2new[index][phase] = f;
+    }
+  }
+
+  signal<klut_network> create_lut_for_gate_rec( binding_view<klut_network>& res, composed_gate<NInputs> const& gate, std::vector<signal<klut_network>> const& children )
+  {
+    std::vector<signal<klut_network>> children_local( gate.fanin.size() );
+
+    auto i = 0u;
+    for ( auto const fanin : gate.fanin )
+    {
+      if ( fanin->root == nullptr )
+      {
+        /* terminal condition */
+        children_local[i] = children[fanin->id];
+      }
+      else
+      {
+        children_local[i] = create_lut_for_gate_rec( res, *fanin, children );
+      }
+      ++i;
+    }
+
+    auto f = res.create_node( children_local, gate.root->function );
+    res.add_binding( res.get_node( f ), gate.root->id );
+    return f;
+  }
+
+  void create_lut_for_gate2( cell_view<block_network>& res, block_map& old2new, uint32_t index, unsigned phase, std::vector<uint32_t> const& genlib_to_cell )
+  {
+    auto const& node_data = node_match[index];
+    auto const& best_cut = cuts[index][node_data.best_cut[phase]];
+    auto const& gate = node_data.best_supergate[phase]->root;
+
+    /* permutate and negate to obtain the matched gate truth table */
+    std::vector<signal<block_network>> children( gate->num_vars );
+
+    auto ctr = 0u;
+    for ( auto l : best_cut )
+    {
+      if ( ctr >= gate->num_vars )
+        break;
+      children[node_data.best_supergate[phase]->permutation[ctr]] = old2new[l][( node_data.phase[phase] >> ctr ) & 1];
+      ++ctr;
+    }
+
+    if ( !gate->is_super )
+    {
+      /* create the node */
+      auto f = res.create_node( children, gate->function );
+      res.add_cell( res.get_node( f ), genlib_to_cell.at( gate->root->id ) );
+
+      /* add the node in the data structure */
+      old2new[index][phase] = f;
+    }
+    else
+    {
+      /* supergate, create sub-gates */
+      auto f = create_lut_for_gate2_rec( res, *gate, children, genlib_to_cell );
+
+      /* add the node in the data structure */
+      old2new[index][phase] = f;
+    }
+  }
+
+  signal<block_network> create_lut_for_gate2_rec( cell_view<block_network>& res, composed_gate<NInputs> const& gate, std::vector<signal<block_network>> const& children, std::vector<uint32_t> const& genlib_to_cell )
+  {
+    std::vector<signal<block_network>> children_local( gate.fanin.size() );
+
+    auto i = 0u;
+    for ( auto const fanin : gate.fanin )
+    {
+      if ( fanin->root == nullptr )
+      {
+        /* terminal condition */
+        children_local[i] = children[fanin->id];
+      }
+      else
+      {
+        children_local[i] = create_lut_for_gate2_rec( res, *fanin, children, genlib_to_cell );
+      }
+      ++i;
+    }
+
+    auto f = res.create_node( children_local, gate.root->function );
+    res.add_cell( res.get_node( f ), genlib_to_cell.at( gate.root->id ) );
+    return f;
+  }
+
+  void create_block_for_gate( cell_view<block_network>& res, block_map& old2new, uint32_t index, unsigned phase, std::vector<uint32_t> const& genlib_to_cell )
+  {
+    std::vector<standard_cell> const& lib = res.get_library();
+    composed_gate<NInputs> const* local_gate = node_match[index].best_supergate[phase]->root;
+    standard_cell const& cell = lib[genlib_to_cell.at( local_gate->root->id )];
+
+    assert( !local_gate->is_super );
+    auto const& best_cut = cuts[index][node_match[index].best_cut[phase]];
+
+    /* permutate and negate to obtain the matched gate truth table */
+    std::vector<signal<block_network>> children( cell.gates.front().num_vars );
+
+    /* output negations have already been assigned by the mapper */
+    auto ctr = 0u;
+    for ( auto l : best_cut )
+    {
+      if ( ctr >= local_gate->num_vars )
+        break;
+      children[node_match[index].best_supergate[phase]->permutation[ctr]] = old2new[l][( node_match[index].phase[phase] >> ctr ) & 1];
+      ++ctr;
+    }
+
+    multi_match_t const& tuple_data = multi_node_match[node_tuple_match[index]][0];
+    std::vector<uint32_t> outputs;
+    std::vector<kitty::dynamic_truth_table> functions;
+
+    /* re-order outputs to match the ones of the cell */
+    for ( gate const& g : cell.gates )
+    {
+      /* find the correct node */
+      for ( auto j = 0; j < max_multioutput_output_size; ++j )
+      {
+        uint32_t node_index = tuple_data[j].node_index;
+        assert( node_match[node_index].same_match );
+        uint8_t node_phase = node_match[node_index].best_supergate[0] != nullptr ? 0 : 1;
+        assert( node_match[node_index].multioutput_match[node_phase] );
+
+        gate const* node_gate = node_match[node_index].best_supergate[node_phase]->root->root;
+
+        /* wrong output */
+        if ( node_gate->id != g.id )
+          continue;
+
+        outputs.push_back( node_index );
+        functions.push_back( g.function );
+      }
+    }
+
+    assert( outputs.size() == cell.gates.size() );
+
+    /* create the block */
+    auto f = res.create_node( children, functions );
+    res.add_cell( res.get_node( f ), genlib_to_cell.at( local_gate->root->id ) );
+
+    for ( uint32_t s : outputs )
+    {
+      /* add inverted version if used */
+      uint8_t node_phase = node_match[s].best_supergate[0] != nullptr ? 0 : 1;
+      assert( node_match[s].same_match );
+
+      /* add the node in the data structure */
+      old2new[s][node_phase] = f;
+
+      if ( node_match[s].map_refs[node_phase ^ 1] > 0 )
+      {
+        old2new[s][node_phase ^ 1] = res.create_not( f );
+        res.add_cell( res.get_node( old2new[s][node_phase ^ 1] ), genlib_to_cell.at( lib_inv_id ) );
+      }
+
+      f = res.next_output_pin( f );
+    }
+  }
+
+  void clone_box( binding_view<klut_network>& res, klut_map& old2new, uint32_t index )
+  {
+    node<Ntk> n = ntk.index_to_node( index );
+    std::vector<signal<klut_network>> children;
+
+    ntk.foreach_fanin( n, [&]( auto const& f ) {
+      children.push_back( old2new[ntk.get_node( f )][ntk.is_complemented( f ) ? 1 : 0] );
+    } );
+
+    /* create the node */
+    auto const& tt = ntk.node_function( n );
+    auto f = res.create_node( children, tt );
+
+    /* add the node in the data structure */
+    old2new[index][0] = f;
+    if ( node_match[index].map_refs[1] )
+    {
+      old2new[index][1] = res.create_not( f );
+      res.add_binding( res.get_node( old2new[index][1] ), lib_inv_id );
+    }
+
+    if constexpr ( has_has_binding_v<Ntk> )
+    {
+      if ( ntk.has_binding( n ) )
+        res.add_binding( res.get_node( f ), ntk.get_binding_index( n ) );
+    }
+  }
+
+  void clone_box2( cell_view<block_network>& res, klut_map& old2new, uint32_t index, std::vector<uint32_t> const& genlib_to_cell )
+  {
+    node<Ntk> n = ntk.index_to_node( index );
+    std::vector<signal<block_network>> children;
+
+    ntk.foreach_fanin( n, [&]( auto const& f ) {
+      children.push_back( old2new[ntk.get_node( f )][ntk.is_complemented( f ) ? 1 : 0] );
+    } );
+
+    /* check if multi-output */
+    std::vector<standard_cell> const& lib = res.get_library();
+    if constexpr ( has_has_binding_v<Ntk> )
+    {
+      bool is_multioutput = false;
+      if ( ntk.has_binding( n ) )
+      {
+        uint32_t cell_id = genlib_to_cell.at( ntk.get_binding_index( n ) );
+        if ( lib.at( cell_id ).gates.size() > 1 )
+          is_multioutput = true;
+      }
+
+      /* create the multioutput node (partially dangling) */
+      if ( is_multioutput )
+      {
+        standard_cell const& cell = lib.at( genlib_to_cell.at( ntk.get_binding_index( n ) ) );
+        std::vector<kitty::dynamic_truth_table> functions;
+        for ( auto const& g : cell.gates )
+        {
+          functions.push_back( g.function );
+        }
+
+        auto f = res.create_node( children, functions );
+
+        /* find and connect the correct pin */
+        for ( auto const& g : cell.gates )
+        {
+          if ( g.id == cell.id )
+            break;
+          res.next_output_pin( f );
+        }
+
+        old2new[index][0] = f;
+        res.add_cell( res.get_node( f ), cell.id );
+        if ( node_match[index].map_refs[1] )
+        {
+          old2new[index][1] = res.create_not( f );
+          res.add_cell( res.get_node( old2new[index][1] ), genlib_to_cell.at( lib_inv_id ) );
+        }
+        return;
+      }
+    }
+
+    /* create the single-output node */
+    auto const& tt = ntk.node_function( n );
+    auto f = res.create_node( children, tt );
+
+    /* add the node in the data structure */
+    old2new[index][0] = f;
+    if ( node_match[index].map_refs[1] )
+    {
+      old2new[index][1] = res.create_not( f );
+      res.add_cell( res.get_node( old2new[index][1] ), genlib_to_cell.at( lib_inv_id ) );
+    }
+
+    if constexpr ( has_has_binding_v<Ntk> )
+    {
+      if ( ntk.has_binding( n ) )
+        res.add_cell( res.get_node( f ), genlib_to_cell.at( ntk.get_binding_index( n ) ) );
+    }
+  }
+#pragma endregion
+
+#pragma region Cuts and matching utils
+  template<bool DO_AREA>
+  void compute_cut_data( cut_t& cut, node<Ntk> const& n )
+  {
+    double best_arrival = std::numeric_limits<float>::max();
+    double best_area_flow = std::numeric_limits<float>::max();
+    cut->delay = best_arrival;
+    cut->flow = best_area_flow;
+    cut->ignore = false;
+
+    if ( cut.size() > NInputs || cut.size() > 6 )
+    {
+      /* Ignore cuts too big to be mapped using the library */
+      cut->ignore = true;
+      return;
+    }
+
+    const auto tt = cut->function;
+    const kitty::static_truth_table<6> fe = kitty::extend_to<6>( tt );
+    auto fe_canon = fe;
+
+    uint16_t negations_pos = 0;
+    uint16_t negations_neg = 0;
+
+    /* match positive polarity */
+    if constexpr ( Configuration == classification_type::p_configurations )
+    {
+      auto canon = kitty::exact_n_canonization_support( fe, cut.size() );
+      fe_canon = std::get<0>( canon );
+      negations_pos = std::get<1>( canon );
+    }
+
+    auto const supergates_pos = library.get_supergates( fe_canon );
+
+    /* match negative polarity */
+    if constexpr ( Configuration == classification_type::p_configurations )
+    {
+      auto canon = kitty::exact_n_canonization_support( ~fe, cut.size() );
+      fe_canon = std::get<0>( canon );
+      negations_neg = std::get<1>( canon );
+    }
+    else
+    {
+      fe_canon = ~fe;
+    }
+
+    auto const supergates_neg = library.get_supergates( fe_canon );
+
+    if ( supergates_pos != nullptr || supergates_neg != nullptr )
+    {
+      cut->supergates = { supergates_pos, supergates_neg };
+      cut->negations = { negations_pos, negations_neg };
+    }
+    else
+    {
+      /* Ignore not matched cuts */
+      cut->ignore = true;
+      return;
+    }
+
+    /* compute cut cost based on LUT area */
+    best_arrival = 0;
+    best_area_flow = cut.size() > 1 ? cut.size() : 0;
+
+    for ( auto leaf : cut )
+    {
+      const auto& best_leaf_cut = cuts[leaf][0];
+      best_arrival = std::max( best_arrival, best_leaf_cut->delay );
+      best_area_flow += best_leaf_cut->flow;
+    }
+
+    cut->delay = best_arrival + ( cut.size() > 1 ) ? 1 : 0;
+    cut->flow = best_area_flow / ntk.fanout_size( n );
+  }
+
+  /* compute positions of leave indices in cut `sub` (subset) with respect to
+   * leaves in cut `sup` (super set).
+   *
+   * Example:
+   *   compute_truth_table_support( {1, 3, 6}, {0, 1, 2, 3, 6, 7} ) = {1, 3, 4}
+   */
+  void compute_truth_table_support( cut_t const& sub, cut_t const& sup, TT& tt )
+  {
+    size_t j = 0;
+    auto itp = sup.begin();
+    for ( auto i : sub )
+    {
+      itp = std::find( itp, sup.end(), i );
+      lsupport[j++] = static_cast<uint8_t>( std::distance( sup.begin(), itp ) );
+    }
+
+    /* swap variables in the truth table */
+    for ( int i = j - 1; i >= 0; --i )
+    {
+      assert( i <= lsupport[i] );
+      kitty::swap_inplace( tt, i, lsupport[i] );
+    }
+  }
+
+  void add_zero_cut( uint32_t index )
+  {
+    auto& cut = cuts[index].add_cut( &index, &index ); /* fake iterator for emptyness */
+    cut->ignore = true;
+  }
+
+  void add_unit_cut( uint32_t index )
+  {
+    auto& cut = cuts[index].add_cut( &index, &index + 1 );
+
+    kitty::create_nth_var( cut->function, 0 );
+    cut->ignore = true;
+  }
+
+  inline bool fast_support_minimization( TT const& tt, cut_t& res )
+  {
+    uint32_t support = 0u;
+    uint32_t support_size = 0u;
+    for ( uint32_t i = 0u; i < tt.num_vars(); ++i )
+    {
+      if ( kitty::has_var( tt, i ) )
+      {
+        support |= 1u << i;
+        ++support_size;
+      }
+    }
+
+    /* has not minimized support? */
+    if ( ( support & ( support + 1u ) ) != 0u )
+    {
+      return false;
+    }
+
+    /* variables not in the support are the most significative */
+    if ( support_size != res.size() )
+    {
+      std::vector<uint32_t> leaves( res.begin(), res.begin() + support_size );
+      res.set_leaves( leaves.begin(), leaves.end() );
+    }
+
+    return true;
+  }
+
+  void compute_truth_table( uint32_t index, fanin_cut_t const& vcuts, uint32_t fanin, cut_t& res )
+  {
+    for ( uint32_t i = 0; i < fanin; ++i )
+    {
+      cut_t const* cut = vcuts[i];
+      ltruth[i] = ( *cut )->function;
+      compute_truth_table_support( *cut, res, ltruth[i] );
+    }
+
+    auto tt_res = ntk.compute( ntk.index_to_node( index ), ltruth.begin(), ltruth.begin() + fanin );
+
+    if ( ps.cut_enumeration_ps.minimize_truth_table && !fast_support_minimization( tt_res, res ) )
+    {
+      const auto support = kitty::min_base_inplace( tt_res );
+
+      std::vector<uint32_t> leaves_before( res.begin(), res.end() );
+      std::vector<uint32_t> leaves_after( support.size() );
+
+      auto it_support = support.begin();
+      auto it_leaves = leaves_after.begin();
+      while ( it_support != support.end() )
+      {
+        *it_leaves++ = leaves_before[*it_support++];
+      }
+      res.set_leaves( leaves_after.begin(), leaves_after.end() );
+    }
+
+    res->function = tt_res;
+  }
+#pragma endregion
+
+  template<bool DO_AREA>
+  inline bool compare_map( double arrival, double best_arrival, double area_flow, double best_area_flow, uint32_t size, uint32_t best_size )
+  {
+    if constexpr ( DO_AREA )
+    {
+      if ( area_flow < best_area_flow - epsilon )
+      {
+        return true;
+      }
+      else if ( area_flow > best_area_flow + epsilon )
+      {
+        return false;
+      }
+      else if ( arrival < best_arrival - epsilon )
+      {
+        return true;
+      }
+      else if ( arrival > best_arrival + epsilon )
+      {
+        return false;
+      }
+      return size < best_size;
+    }
+    else
+    {
+      if ( arrival < best_arrival - epsilon )
+      {
+        return true;
+      }
+      else if ( arrival > best_arrival + epsilon )
+      {
+        return false;
+      }
+      else if ( area_flow < best_area_flow - epsilon )
+      {
+        return true;
+      }
+      else if ( area_flow > best_area_flow + epsilon )
+      {
+        return false;
+      }
+      return size < best_size;
+    }
+  }
+
+  double compute_switching_power()
+  {
+    double power = 0.0f;
+
+    for ( auto const& n : topo_order )
+    {
+      const auto index = ntk.node_to_index( n );
+      auto& node_data = node_match[index];
+
+      if ( ntk.is_constant( n ) )
+      {
+        if ( node_data.best_supergate[0] == nullptr && node_data.best_supergate[1] == nullptr )
+          continue;
+      }
+      else if ( ntk.is_pi( n ) )
+      {
+        if ( node_data.map_refs[1] > 0 )
+          power += switch_activity[ntk.node_to_index( n )];
+        continue;
+      }
+
+      /* continue if cut is not in the cover */
+      if ( node_match[index].map_refs[2] == 0u )
+        continue;
+
+      unsigned phase = ( node_data.best_supergate[0] != nullptr ) ? 0 : 1;
+
+      if ( node_data.same_match || node_data.map_refs[phase] > 0 )
+      {
+        power += switch_activity[ntk.node_to_index( n )];
+
+        if ( node_data.same_match && node_data.map_refs[phase ^ 1] > 0 )
+          power += switch_activity[ntk.node_to_index( n )];
+      }
+
+      phase = phase ^ 1;
+      if ( !node_data.same_match && node_data.map_refs[phase] > 0 )
+      {
+        power += switch_activity[ntk.node_to_index( n )];
+      }
+    }
+
+    return power;
+  }
+
+#pragma region multioutput
+  /* Experimental code */
+  void compute_multioutput_match()
+  {
+    stopwatch t( st.time_multioutput );
+
+    if ( library.num_multioutput_gates() == 0 )
+      return;
+
+    /* compute cuts: first simple method without proper matching */
+    cut_enumeration_params multi_ps;
+    multi_ps.minimize_truth_table = false;
+    multi_cuts_t multi_cuts = fast_cut_enumeration<Ntk, max_multioutput_cut_size, true, cut_enumeration_emap_multi_cut>( ntk, multi_ps );
+
+    /* cuts leaves classes */
+    multi_hash_t multi_cuts_classes;
+    multi_cuts_classes.reserve( 2000 );
+
+    /* Multi-output matching */
+    multi_enumerate_matches( multi_cuts, multi_cuts_classes );
+
+    multi_single_matches_t multi_node_match_local;
+    multi_node_match_local.reserve( multi_cuts_classes.size() );
+
+    multi_compute_matches( multi_cuts, multi_cuts_classes, multi_node_match_local );
+
+    if ( ps.remove_overlapping_multicuts )
+      multi_filter_and_match<true>( multi_cuts, multi_node_match_local ); /* it also adds the tuple for node mapping */
+    else
+      multi_filter_and_match<false>( multi_cuts, multi_node_match_local ); /* it also adds the tuple for node mapping */
+  }
+
+  void multi_init_topo_order()
+  {
+    /* create and initialize a choice view to store the tuples */
+    choice_view<Ntk> choice_ntk{ ntk };
+    multi_add_choices( choice_ntk );
+
+    ntk.incr_trav_id();
+    ntk.incr_trav_id();
+
+    /* add constants and CIs */
+    const auto c0 = ntk.get_node( ntk.get_constant( false ) );
+    topo_order.push_back( c0 );
+    ntk.set_visited( c0, ntk.trav_id() );
+
+    if ( const auto c1 = ntk.get_node( ntk.get_constant( true ) ); ntk.visited( c1 ) != ntk.trav_id() )
+    {
+      topo_order.push_back( c1 );
+      ntk.set_visited( c1, ntk.trav_id() );
+    }
+
+    ntk.foreach_ci( [&]( auto const& n ) {
+      if ( ntk.visited( n ) != ntk.trav_id() )
+      {
+        topo_order.push_back( n );
+        ntk.set_visited( n, ntk.trav_id() );
+      }
+    } );
+
+    /* sort topologically */
+    ntk.foreach_co( [&]( auto const& f ) {
+      if ( ntk.visited( ntk.get_node( f ) ) == ntk.trav_id() )
+        return;
+      multi_topo_sort_rec( choice_ntk, ntk.get_node( f ) );
+    } );
+  }
+
+  /* Experimental code resticted to only half adders and full adders */
+  void multi_enumerate_matches( multi_cuts_t const& multi_cuts, multi_hash_t& multi_cuts_classes )
+  {
+    static_assert( max_multioutput_cut_size > 1 && max_multioutput_cut_size < 7 );
+
+    uint32_t counter = 0;
+    multi_leaves_set_t leaves = { 0 };
+
+    ntk.foreach_gate( [&]( auto const& n ) {
+      uint32_t cut_index = 0;
+      for ( auto& cut : multi_cuts.cuts( ntk.node_to_index( n ) ) )
+      {
+        kitty::static_truth_table<max_multioutput_cut_size> tt = multi_cuts.truth_table( *cut );
+        /* reduce support for matching ID */
+        uint64_t tt_id = ( cut->size() < 3 ) ? ( tt._bits & 0xF ) : tt._bits;
+        uint64_t id = library.get_multi_function_id( tt_id );
+
+        if ( !id )
+        {
+          ++cut_index;
+          continue;
+        }
+
+        ( *cut )->data.id = id;
+
+        multi_match_data data;
+        data.node_index = ntk.node_to_index( n );
+        data.cut_index = cut_index;
+        leaves[2] = 0;
+        uint32_t i = 0;
+        for ( auto l : *cut )
+          leaves[i++] = l;
+
+        /* add to hash table */
+        multi_cuts_classes[leaves].push_back( data );
+
+        ++cut_index;
+      }
+    } );
+  }
+
+  /* Experimental code */
+  void multi_compute_matches( multi_cuts_t const& multi_cuts, multi_hash_t& multi_cuts_classes, multi_single_matches_t& multi_node_match_local )
+  {
+    ntk.clear_values();
+
+    /* copy set and sort by gate size: improve, too slow */
+    std::vector<std::pair<multi_leaves_set_t, multi_output_set_t>> class_list;
+    class_list.reserve( multi_cuts_classes.size() );
+    for ( auto& it : multi_cuts_classes )
+    {
+      /* insert multiple occurring cuts */
+      if ( it.second.size() > 1 )
+        class_list.push_back( it );
+    }
+
+    std::sort( class_list.begin(), class_list.end(), [&]( auto const& a, auto const& b ) {
+      return a.first[2] > b.first[2];
+    } );
+
+    /* combine and match: specific code for 2-output cells */
+    for ( auto it : class_list )
+    {
+      for ( uint32_t i = 0; i < it.second.size() - 1; ++i )
+      {
+        multi_match_data data_i = it.second[i];
+        uint32_t index_i = data_i.node_index;
+        uint32_t cut_index_i = data_i.cut_index;
+        auto const& cut_i = multi_cuts.cuts( index_i )[cut_index_i];
+
+        for ( uint32_t j = i + 1; j < it.second.size(); ++j )
+        {
+          multi_match_data data_j = it.second[j];
+          uint32_t index_j = data_j.node_index;
+          uint32_t cut_index_j = data_j.cut_index;
+          auto const& cut_j = multi_cuts.cuts( index_j )[cut_index_j];
+
+          /* not compatible -> TODO: change */
+          if ( cut_i->data.id == cut_j->data.id )
+            continue;
+
+          /* check compatibility */
+          if ( !multi_check_partally_dangling( index_i, index_j, cut_i ) )
+            continue;
+
+          multi_node_match_local.push_back( { data_i, data_j } );
+        }
+      }
+    }
+  }
+
+  /* Experimental code */
+  template<bool OverlapFilter>
+  void multi_filter_and_match( multi_cuts_t const& multi_cuts, multi_single_matches_t const& multi_node_match_local )
+  {
+    multi_cut_set.reserve( multi_node_match_local.size() );
+    multi_node_match.reserve( multi_node_match_local.size() );
+
+    ntk.incr_trav_id();
+
+    for ( auto& pair : multi_node_match_local )
+    {
+      uint32_t index1 = pair[0].node_index;
+      uint32_t index2 = pair[1].node_index;
+      uint32_t cut_index1 = pair[0].cut_index;
+      uint32_t cut_index2 = pair[1].cut_index;
+      multi_cut_t const& cut1 = multi_cuts.cuts( index1 )[cut_index1];
+      multi_cut_t const& cut2 = multi_cuts.cuts( index2 )[cut_index2];
+
+      assert( index1 < index2 );
+
+      /* remove incompatible multi-output cuts */
+      bool is_new = true;
+      uint32_t insertion_index = multi_node_match.size();
+      if constexpr ( OverlapFilter )
+      {
+        if ( multi_gate_check_overlapping( index1, index2, cut1 ) )
+          continue;
+      }
+      else
+      {
+        if ( multi_gate_check_incompatible( index1, index2, is_new, insertion_index ) )
+          continue;
+        // if ( is_new && multi_gate_check_overlapping( index1, index2, cut1 ) )
+        //   continue;
+      }
+
+      /* copy cuts */
+      cut_t new_cut1, new_cut2;
+      new_cut1.set_leaves( cut1.begin(), cut1.end() );
+      new_cut2.set_leaves( cut2.begin(), cut2.end() );
+      new_cut1->function = kitty::extend_to<6>( multi_cuts.truth_table( cut1 ) );
+      new_cut2->function = kitty::extend_to<6>( multi_cuts.truth_table( cut2 ) );
+
+      /* Multi-output Boolean matching, continue if no match */
+      std::array<cut_t, max_multioutput_output_size> cut_pair = { new_cut1, new_cut2 };
+      if ( !multi_compute_cut_data( cut_pair ) )
+        continue;
+
+      /* mark multioutput gate */
+      if constexpr ( OverlapFilter )
+      {
+        multi_gate_mark_visited( index1, index2, cut1 );
+        node_tuple_match[index2] = multi_node_match.size();
+      }
+      else
+      {
+        // multi_gate_mark_visited( index1, index2, cut1 );
+        multi_gate_mark_compatibility( index1, index2, insertion_index );
+      }
+
+      /* add cut */
+      multi_cut_set.push_back( cut_pair );
+
+      /* re-index data */
+      multi_match_data new_data1, new_data2;
+      new_data1.node_index = index1;
+      new_data1.cut_index = multi_cut_set.size() - 1;
+      new_data2.node_index = index2;
+      new_data2.cut_index = multi_cut_set.size() - 1;
+      multi_match_t p = { new_data1, new_data2 };
+
+      /* add cuts to the correct bucket */
+      if ( is_new )
+      {
+        multi_node_match.push_back( { p } );
+      }
+      else
+      {
+        multi_node_match[insertion_index].push_back( p );
+      }
+    }
+
+    /* remove indexing for lower index for compatible overlapping cuts */
+    if constexpr ( !OverlapFilter )
+    {
+      for ( auto const& entry : multi_node_match )
+      {
+        multi_match_t const& p = entry[0];
+        node_tuple_match[p[0].node_index] = UINT32_MAX;
+      }
+    }
+  }
+
+  bool multi_compute_cut_data( std::array<cut_t, max_multioutput_output_size>& cut_tuple )
+  {
+    std::array<kitty::static_truth_table<6>, max_multioutput_output_size> tts;
+    std::array<kitty::static_truth_table<6>, max_multioutput_output_size> tts_order;
+    std::array<size_t, max_multioutput_output_size> order = {};
+    std::array<uint16_t, max_multioutput_output_size> phase = { 0 };
+    std::array<uint8_t, max_multioutput_output_size> phase_order;
+
+    std::iota( order.begin(), order.end(), 0 );
+
+    for ( auto i = 0; i < max_multioutput_output_size; ++i )
+    {
+      tts[i] = kitty::extend_to<6>( cut_tuple[i]->function );
+      if ( ( tts[i]._bits & 1 ) == 1 )
+      {
+        tts[i] = ~tts[i];
+        phase[i] = 1;
+      }
+    }
+
+    std::sort( order.begin(), order.end(), [&]( size_t a, size_t b ) {
+      return tts[a] < tts[b];
+    } );
+
+    std::transform( order.begin(), order.end(), tts_order.begin(), [&]( size_t a ) {
+      return tts[a];
+    } );
+
+    std::transform( order.begin(), order.end(), phase_order.begin(), [&]( uint8_t a ) {
+      return phase[a];
+    } );
+
+    auto const multigates_match = library.get_multi_supergates( tts_order );
+
+    /* Ignore not matched cuts */
+    if ( multigates_match == nullptr )
+      return false;
+
+    /* add cut matches */
+    for ( auto i = 0; i < max_multioutput_output_size; ++i )
+    {
+      std::vector<supergate<NInputs>> const* multigate = &( ( *multigates_match )[i] );
+      cut_tuple[order[i]]->supergates[phase_order[i]] = multigate;
+    }
+
+    return true;
+  }
+
+  inline bool multi_check_partally_dangling( uint32_t index1, uint32_t index2, multi_cut_t const& cut1 )
+  {
+    bool valid = true;
+
+    /* check containment of cut1 in cut2 and viceversa */
+    if ( index1 > index2 )
+    {
+      std::swap( index1, index2 );
+    }
+
+    ntk.foreach_fanin( ntk.index_to_node( index2 ), [&]( auto const& f ) {
+      auto g = ntk.get_node( f );
+      if ( ntk.node_to_index( g ) == index1 && ntk.fanout_size( g ) == 1 )
+      {
+        valid = false;
+      }
+      return valid;
+    } );
+
+    if ( !valid )
+      return false;
+
+    if ( !is_contained_mffc( ntk.index_to_node( index2 ), ntk.index_to_node( index1 ), cut1 ) )
+      return false;
+
+    return true;
+  }
+
+  inline bool multi_gate_check_overlapping( uint32_t index1, uint32_t index2, multi_cut_t const& cut )
+  {
+    bool contained = false;
+
+    /* mark leaves */
+    for ( auto leaf : cut )
+    {
+      ntk.incr_value( ntk.index_to_node( leaf ) );
+    }
+
+    contained = multi_mark_visited_rec<false>( ntk.index_to_node( index1 ) );
+    contained |= multi_mark_visited_rec<false>( ntk.index_to_node( index2 ) );
+
+    /* unmark leaves */
+    for ( auto leaf : cut )
+    {
+      ntk.decr_value( ntk.index_to_node( leaf ) );
+    }
+
+    return contained;
+  }
+
+  inline bool multi_gate_check_incompatible( uint32_t index1, uint32_t index2, bool& is_new, uint32_t& data_index )
+  {
+    /* check cut assigned cut outputs, specialized code for 2 outputs */
+    uint32_t current_assignment = node_tuple_match[index1];
+    if ( current_assignment != node_tuple_match[index2] )
+      return true;
+
+    /* load data */
+    if ( current_assignment != UINT32_MAX )
+    {
+      is_new = false;
+      data_index = current_assignment;
+    }
+
+    return false;
+  }
+
+  inline void multi_gate_mark_compatibility( uint32_t index1, uint32_t index2, uint32_t mark_value )
+  {
+    node_tuple_match[index1] = mark_value;
+    node_tuple_match[index2] = mark_value;
+  }
+
+  inline void multi_gate_mark_visited( uint32_t index1, uint32_t index2, multi_cut_t const& cut )
+  {
+    /* mark leaves */
+    for ( auto leaf : cut )
+    {
+      ntk.incr_value( ntk.index_to_node( leaf ) );
+    }
+
+    /* mark */
+    multi_mark_visited_rec<true>( ntk.index_to_node( index1 ) );
+    multi_mark_visited_rec<true>( ntk.index_to_node( index2 ) );
+
+    /* unmark leaves */
+    for ( auto leaf : cut )
+    {
+      ntk.decr_value( ntk.index_to_node( leaf ) );
+    }
+  }
+
+  template<bool MARK>
+  bool multi_mark_visited_rec( node<Ntk> const& n )
+  {
+    /* leaf */
+    if ( ntk.value( n ) )
+      return false;
+
+    /* already visited */
+    if ( ntk.visited( n ) == ntk.trav_id() )
+      return true;
+
+    if constexpr ( MARK )
+    {
+      ntk.set_visited( n, ntk.trav_id() );
+    }
+
+    bool contained = false;
+    ntk.foreach_fanin( n, [&]( auto const& f ) {
+      contained |= multi_mark_visited_rec<MARK>( ntk.get_node( f ) );
+
+      if constexpr ( !MARK )
+      {
+        if ( contained )
+          return false;
+      }
+
+      return true;
+    } );
+
+    return contained;
+  }
+
+  bool is_contained_mffc( node<Ntk> root, node<Ntk> n, multi_cut_t const& cut )
+  {
+    /* reference cut leaves */
+    for ( auto leaf : cut )
+    {
+      ntk.incr_value( ntk.index_to_node( leaf ) );
+    }
+
+    bool valid = true;
+    tmp_visited.clear();
+    dereference_node_rec( root );
+
+    if ( ntk.fanout_size( n ) == 0 )
+      valid = false;
+
+    for ( uint64_t g : tmp_visited )
+      ntk.incr_fanout_size( ntk.index_to_node( g ) );
+
+    /* dereference leaves */
+    for ( auto leaf : cut )
+    {
+      ntk.decr_value( ntk.index_to_node( leaf ) );
+    }
+
+    return valid;
+  }
+
+  void dereference_node_rec( node<Ntk> const& n )
+  {
+    /* leaf */
+    if ( ntk.value( n ) )
+      return;
+
+    ntk.foreach_fanin( n, [&]( auto const& f ) {
+      node<Ntk> g = ntk.get_node( f );
+      if ( ntk.decr_fanout_size( g ) == 0 )
+      {
+        dereference_node_rec( g );
+      }
+      tmp_visited.push_back( ntk.node_to_index( g ) );
+    } );
+  }
+
+  void multi_add_choices( choice_view<Ntk>& choice_ntk )
+  {
+    for ( auto& field : multi_node_match )
+    {
+      auto& pair = field.front();
+      uint32_t index1 = pair[0].node_index;
+      uint32_t index2 = pair[1].node_index;
+      uint32_t cut_index1 = pair[0].cut_index;
+      cut_t const& cut = multi_cut_set[cut_index1][0];
+
+      /* don't add choice if in TFI, set TFI bit */
+      if ( multi_is_in_tfi( ntk.index_to_node( index2 ), ntk.index_to_node( index1 ), cut ) )
+      {
+        /* if there is a path of length > 1 linking node 1 and 2, save as TFI node */
+        uint32_t in_tfi = multi_is_in_direct_tfi( ntk.index_to_node( index2 ), ntk.index_to_node( index1 ) ) ? 0 : 1;
+        for ( auto& match : field )
+          match[0].in_tfi = in_tfi;
+        /* add a TFI dependency */
+        ntk.set_value( ntk.index_to_node( index1 ), index2 );
+        // multi_set_tfi_dependency( ntk.index_to_node( index2 ), ntk.index_to_node( index1 ), cut );
+        continue;
+      }
+
+      choice_ntk.add_choice( ntk.index_to_node( index1 ), ntk.index_to_node( index2 ) );
+
+      assert( choice_ntk.count_choices( ntk.index_to_node( index1 ) ) == 2 );
+    }
+  }
+
+  bool multi_topo_sort_rec( choice_view<Ntk>& choice_ntk, node<Ntk> const& n )
+  {
+    /* is permanently marked? */
+    if ( ntk.visited( n ) == ntk.trav_id() )
+      return true;
+
+    /* loop detected: backtrack to remove the cause */
+    if ( ntk.visited( n ) == ntk.trav_id() - 1 )
+      return false;
+
+    /* get the representative (smallest index) */
+    node<Ntk> repr = choice_ntk.get_choice_representative( n );
+
+    /* loop detected: backtrack to remove the cause */
+    if ( ntk.visited( repr ) == ntk.trav_id() - 1 )
+      return false;
+
+    /* solve the TFI dependency first */
+    node<Ntk> dependency_node = ntk.index_to_node( ntk.value( n ) );
+    if ( dependency_node > 0 && ntk.visited( dependency_node ) != ntk.trav_id() - 1 )
+    {
+      if ( !multi_topo_sort_rec( choice_ntk, dependency_node ) )
+        return false;
+      assert( ntk.visited( n ) == ntk.trav_id() );
+      return true;
+    }
+
+    /* for all the choices */
+    uint32_t i = 0;
+    bool check = true;
+    choice_ntk.foreach_choice( repr, [&]( auto const& g ) {
+      /* ensure that the node is not visited or temporarily marked */
+      assert( ntk.visited( g ) != ntk.trav_id() );
+      assert( ntk.visited( g ) != ntk.trav_id() - 1 );
+
+      /* mark node temporarily */
+      ntk.set_visited( g, ntk.trav_id() - 1 );
+
+      /* mark children */
+      ntk.foreach_fanin( g, [&]( auto const& f ) {
+        check = multi_topo_sort_rec( choice_ntk, ntk.get_node( f ) );
+        return check;
+      } );
+
+      /* cycle detected: backtrack to the last choice jump */
+      if ( !check )
+      {
+        /* revert visited */
+        ntk.set_visited( g, ntk.trav_id() - 2 );
+        if ( i > 0 && n == repr )
+        {
+          /* fix cycle: remove multi-output match; TODO: extend for more than 2 outputs */
+          node_tuple_match[ntk.node_to_index( g )] = UINT32_MAX;
+          choice_ntk.remove_choice( g );
+          check = true;
+        }
+        return false;
+      }
+
+      ++i;
+      return true;
+    } );
+
+    if ( !check )
+    {
+      return false;
+    }
+
+    choice_ntk.foreach_choice( repr, [&]( auto const& g ) {
+      /* ensure that the node is not visited */
+      assert( ntk.visited( g ) != ntk.trav_id() );
+
+      /* mark node n permanently */
+      ntk.set_visited( g, ntk.trav_id() );
+
+      /* visit node */
+      topo_order.push_back( g );
+
+      return true;
+    } );
+
+    return true;
+  }
+
+  inline bool multi_is_in_tfi( node<Ntk> const& root, node<Ntk> const& n, cut_t const& cut )
+  {
+    /* reference cut leaves */
+    for ( auto leaf : cut )
+    {
+      ntk.incr_value( ntk.index_to_node( leaf ) );
+    }
+
+    ntk.incr_trav_id();
+    multi_mark_visited_rec<true>( root );
+    bool contained = ntk.visited( n ) == ntk.trav_id();
+
+    /* dereference leaves */
+    for ( auto leaf : cut )
+    {
+      ntk.decr_value( ntk.index_to_node( leaf ) );
+    }
+
+    return contained;
+  }
+
+  inline bool multi_is_in_direct_tfi( node<Ntk> const& root, node<Ntk> const& n )
+  {
+    bool contained = false;
+
+    ntk.foreach_fanin( root, [&]( auto const& f ) {
+      if ( ntk.get_node( f ) == n )
+        contained = true;
+    } );
+
+    return contained;
+  }
+
+  inline void multi_set_tfi_dependency( node<Ntk> const& root, node<Ntk> const& n, cut_t const& cut )
+  {
+    /* reference cut leaves */
+    for ( auto leaf : cut )
+    {
+      ntk.incr_value( ntk.index_to_node( leaf ) );
+    }
+
+    ntk.incr_trav_id();
+
+    /* add a TFI dependencies */
+    ntk.set_value( n, ntk.node_to_index( root ) );
+    ntk.set_visited( n, ntk.trav_id() );
+    multi_set_tfi_dependency_rec( root, ntk.node_to_index( root ) );
+
+    /* reset root's dependency info */
+    ntk.set_value( root, 0 );
+
+    /* dereference leaves */
+    for ( auto leaf : cut )
+    {
+      ntk.decr_value( ntk.index_to_node( leaf ) );
+    }
+  }
+
+  void multi_set_tfi_dependency_rec( node<Ntk> const& n, uint32_t const dependency_info )
+  {
+    /* leaf */
+    if ( ntk.value( n ) )
+      return;
+
+    /* already visited */
+    if ( ntk.visited( n ) == ntk.trav_id() )
+      return;
+
+    ntk.set_visited( n, ntk.trav_id() );
+    ntk.set_value( n, dependency_info );
+
+    ntk.foreach_fanin( n, [&]( auto const& f ) {
+      multi_set_tfi_dependency_rec( ntk.get_node( f ), dependency_info );
+    } );
+  }
+#pragma endregion
+
+private:
+  Ntk const& ntk;
+  tech_library<NInputs, Configuration> const& library;
+  emap_params const& ps;
+  emap_stats& st;
+
+  uint32_t iteration{ 0 }; /* current mapping iteration */
+  double delay{ 0.0f };    /* current delay of the mapping */
+  double area{ 0.0f };     /* current area of the mapping */
+  uint32_t inv{ 0 };       /* current inverter count */
+
+  /* lib inverter info */
+  float lib_inv_area;
+  float lib_inv_delay;
+  uint32_t lib_inv_id;
+
+  /* lib buffer info */
+  float lib_buf_area;
+  float lib_buf_delay;
+  uint32_t lib_buf_id;
+
+  std::vector<node<Ntk>> topo_order;
+  node_match_t node_match;
+  std::vector<uint32_t> node_tuple_match;
+  std::vector<float> switch_activity;
+  std::vector<uint64_t> tmp_visited;
+
+  /* cut computation */
+  std::vector<cut_set_t> cuts; /* compressed representation of cuts */
+  cut_merge_t lcuts;           /* cut merger container */
+  truth_compute_t ltruth;      /* truth table merger container */
+  support_t lsupport;          /* support merger container */
+  uint32_t cuts_total{ 0 };    /* current computed cuts */
+
+  /* multi-output matching */
+  multi_cut_set_t multi_cut_set;    /* set of multi-output cuts */
+  multi_matches_t multi_node_match; /* matched multi-output gates */
+
+  time_point time_begin;
+};
+
+} /* namespace detail */
+
+/*! \brief Technology mapping.
+ *
+ * This function implements a technology mapping algorithm.
+ *
+ * The function takes the size of the cuts in the template parameter `CutSize`.
+ *
+ * The function returns a k-LUT network. Each LUT abstacts a gate of the technology library.
+ *
+ * **Required network functions:**
+ * - `size`
+ * - `is_pi`
+ * - `is_constant`
+ * - `node_to_index`
+ * - `index_to_node`
+ * - `get_node`
+ * - `foreach_po`
+ * - `foreach_node`
+ * - `fanout_size`
+ *
+ * \param ntk Network
+ * \param library Technology library
+ * \param ps Mapping params
+ * \param pst Mapping statistics
+ *
+ */
+template<unsigned CutSize = 6u, class Ntk, unsigned NInputs, classification_type Configuration>
+binding_view<klut_network> emap( Ntk const& ntk, tech_library<NInputs, Configuration> const& library, emap_params const& ps = {}, emap_stats* pst = nullptr )
+{
+  static_assert( is_network_type_v<Ntk>, "Ntk is not a network type" );
+  static_assert( has_size_v<Ntk>, "Ntk does not implement the size method" );
+  static_assert( has_is_pi_v<Ntk>, "Ntk does not implement the is_pi method" );
+  static_assert( has_is_constant_v<Ntk>, "Ntk does not implement the is_constant method" );
+  static_assert( has_node_to_index_v<Ntk>, "Ntk does not implement the node_to_index method" );
+  static_assert( has_index_to_node_v<Ntk>, "Ntk does not implement the index_to_node method" );
+  static_assert( has_get_node_v<Ntk>, "Ntk does not implement the get_node method" );
+  static_assert( has_foreach_po_v<Ntk>, "Ntk does not implement the foreach_po method" );
+  static_assert( has_foreach_node_v<Ntk>, "Ntk does not implement the foreach_node method" );
+  static_assert( has_fanout_size_v<Ntk>, "Ntk does not implement the fanout_size method" );
+
+  emap_stats st;
+  detail::emap_impl<Ntk, CutSize, NInputs, Configuration> p( ntk, library, ps, st );
+  auto res = p.run();
+
+  if ( ps.verbose && !st.mapping_error )
+  {
+    st.report();
+  }
+
+  if ( pst )
+  {
+    *pst = st;
+  }
+  return res;
+}
+
+/*! \brief Technology mapping.
+ *
+ * This function implements a technology mapping algorithm.
+ *
+ * The function takes the size of the cuts in the template parameter `CutSize`.
+ *
+ * The function returns a block network that supports multi-output cells.
+ *
+ * **Required network functions:**
+ * - `size`
+ * - `is_pi`
+ * - `is_constant`
+ * - `node_to_index`
+ * - `index_to_node`
+ * - `get_node`
+ * - `foreach_po`
+ * - `foreach_node`
+ * - `fanout_size`
+ *
+ * \param ntk Network
+ * \param library Technology library
+ * \param ps Mapping params
+ * \param pst Mapping statistics
+ *
+ */
+template<unsigned CutSize = 6u, class Ntk, unsigned NInputs, classification_type Configuration>
+cell_view<block_network> emap_block( Ntk const& ntk, tech_library<NInputs, Configuration> const& library, emap_params const& ps = {}, emap_stats* pst = nullptr )
+{
+  static_assert( is_network_type_v<Ntk>, "Ntk is not a network type" );
+  static_assert( has_size_v<Ntk>, "Ntk does not implement the size method" );
+  static_assert( has_is_pi_v<Ntk>, "Ntk does not implement the is_pi method" );
+  static_assert( has_is_constant_v<Ntk>, "Ntk does not implement the is_constant method" );
+  static_assert( has_node_to_index_v<Ntk>, "Ntk does not implement the node_to_index method" );
+  static_assert( has_index_to_node_v<Ntk>, "Ntk does not implement the index_to_node method" );
+  static_assert( has_get_node_v<Ntk>, "Ntk does not implement the get_node method" );
+  static_assert( has_foreach_po_v<Ntk>, "Ntk does not implement the foreach_po method" );
+  static_assert( has_foreach_node_v<Ntk>, "Ntk does not implement the foreach_node method" );
+  static_assert( has_fanout_size_v<Ntk>, "Ntk does not implement the fanout_size method" );
+
+  emap_stats st;
+  detail::emap_impl<Ntk, CutSize, NInputs, Configuration> p( ntk, library, ps, st );
+  auto res = p.run_block();
+
+  if ( ps.verbose && !st.mapping_error )
+  {
+    st.report();
+  }
+
+  if ( pst )
+  {
+    *pst = st;
+  }
+  return res;
+}
+
+/*! \brief Technology node mapping.
+ *
+ * This function implements a simple technology mapping algorithm.
+ * The algorithm maps each node to the best implementation in the technology library.
+ *
+ * **Required network functions:**
+ * - `size`
+ * - `is_pi`
+ * - `is_constant`
+ * - `node_to_index`
+ * - `index_to_node`
+ * - `get_node`
+ * - `foreach_po`
+ * - `foreach_node`
+ * - `fanout_size`
+ * - `has_binding`
+ *
+ * \param ntk Network
+ * \param library Technology library
+ * \param ps Mapping params
+ * \param pst Mapping statistics
+ *
+ */
+template<unsigned CutSize = 6u, class Ntk, unsigned NInputs, classification_type Configuration>
+binding_view<klut_network> emap_node_map( Ntk const& ntk, tech_library<NInputs, Configuration> const& library, emap_params const& ps = {}, emap_stats* pst = nullptr )
+{
+  static_assert( is_network_type_v<Ntk>, "Ntk is not a network type" );
+  static_assert( has_size_v<Ntk>, "Ntk does not implement the size method" );
+  static_assert( has_is_pi_v<Ntk>, "Ntk does not implement the is_pi method" );
+  static_assert( has_is_constant_v<Ntk>, "Ntk does not implement the is_constant method" );
+  static_assert( has_node_to_index_v<Ntk>, "Ntk does not implement the node_to_index method" );
+  static_assert( has_index_to_node_v<Ntk>, "Ntk does not implement the index_to_node method" );
+  static_assert( has_get_node_v<Ntk>, "Ntk does not implement the get_node method" );
+  static_assert( has_foreach_po_v<Ntk>, "Ntk does not implement the foreach_po method" );
+  static_assert( has_foreach_node_v<Ntk>, "Ntk does not implement the foreach_node method" );
+  static_assert( has_has_binding_v<Ntk>, "Ntk does not implement the has_binding method" );
+
+  emap_stats st;
+  detail::emap_impl<Ntk, CutSize, NInputs, Configuration> p( ntk, library, ps, st );
+  auto res = p.run_node_map();
+
+  if ( ps.verbose && !st.mapping_error )
+  {
+    st.report();
+  }
+
+  if ( pst )
+  {
+    *pst = st;
+  }
+  return res;
+}
+
+/*! \brief Technology node mapping.
+ *
+ * This function implements a simple technology mapping algorithm.
+ * The algorithm maps each node to the first implementation in the technology library.
+ *
+ * The input must be a binding_view with the gates correctly loaded.
+ *
+ * **Required network functions:**
+ * - `size`
+ * - `is_pi`
+ * - `is_constant`
+ * - `node_to_index`
+ * - `index_to_node`
+ * - `get_node`
+ * - `foreach_po`
+ * - `foreach_node`
+ * - `fanout_size`
+ * - `has_binding`
+ *
+ * \param ntk Network
+ *
+ */
+template<class Ntk>
+void emap_load_mapping( Ntk& ntk )
+{
+  static_assert( is_network_type_v<Ntk>, "Ntk is not a network type" );
+  static_assert( has_size_v<Ntk>, "Ntk does not implement the size method" );
+  static_assert( has_is_pi_v<Ntk>, "Ntk does not implement the is_pi method" );
+  static_assert( has_is_constant_v<Ntk>, "Ntk does not implement the is_constant method" );
+  static_assert( has_node_to_index_v<Ntk>, "Ntk does not implement the node_to_index method" );
+  static_assert( has_index_to_node_v<Ntk>, "Ntk does not implement the index_to_node method" );
+  static_assert( has_get_node_v<Ntk>, "Ntk does not implement the get_node method" );
+  static_assert( has_foreach_po_v<Ntk>, "Ntk does not implement the foreach_po method" );
+  static_assert( has_foreach_node_v<Ntk>, "Ntk does not implement the foreach_node method" );
+  static_assert( has_has_binding_v<Ntk>, "Ntk does not implement the has_binding method" );
+
+  /* build the library map */
+  using lib_t = std::unordered_map<kitty::dynamic_truth_table, uint32_t, kitty::hash<kitty::dynamic_truth_table>>;
+  lib_t tt_to_gate;
+
+  for ( auto const& g : ntk.get_library() )
+  {
+    tt_to_gate[g.function] = g.id;
+  }
+
+  ntk.foreach_gate( [&]( auto const& n ) {
+    if ( auto it = tt_to_gate.find( ntk.node_function( n ) ); it != tt_to_gate.end() )
+    {
+      ntk.add_binding( n, it->second );
+    }
+    else
+    {
+      std::cout << fmt::format( "[e] node mapping for node {} failed: no match in the tech library\n", ntk.node_to_index( n ) );
+    }
+  } );
+}
+
+} /* namespace mockturtle */
\ No newline at end of file
diff --git a/include/mockturtle/algorithms/extract_adders.hpp b/include/mockturtle/algorithms/extract_adders.hpp
new file mode 100644
index 000000000..070af555f
--- /dev/null
+++ b/include/mockturtle/algorithms/extract_adders.hpp
@@ -0,0 +1,971 @@
+/* mockturtle: C++ logic network library
+ * Copyright (C) 2018-2023  EPFL
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*!
+  \file extract_adders.hpp
+  \brief Maps adders in the network
+
+  \author Alessandro Tempia Calvino
+*/
+
+#include <algorithm>
+#include <array>
+#include <vector>
+
+#include <fmt/format.h>
+#include <kitty/dynamic_truth_table.hpp>
+#include <kitty/static_truth_table.hpp>
+#include <parallel_hashmap/phmap.h>
+
+#include "../networks/block.hpp"
+#include "../networks/storage.hpp"
+#include "../utils/node_map.hpp"
+#include "../utils/stopwatch.hpp"
+#include "../views/choice_view.hpp"
+#include "cut_enumeration.hpp"
+
+namespace mockturtle
+{
+
+struct extract_adders_params
+{
+  extract_adders_params()
+  {
+    cut_enumeration_ps.cut_limit = 49;
+    cut_enumeration_ps.minimize_truth_table = false;
+  }
+
+  /*! \brief Parameters for cut enumeration
+   *
+   * The default cut limit is 49. By default,
+   * truth table minimization is performed.
+   */
+  cut_enumeration_params cut_enumeration_ps{};
+
+  /*! \brief Map inverted (NAND2-XNOR2, MIN3-XNOR3) */
+  bool map_inverted{ false };
+
+  /*! \brief Filter cuts using the MFFC */
+  bool use_mffc_filter{ true };
+
+  /*! \brief Be verbose */
+  bool verbose{ false };
+};
+
+struct extract_adders_stats
+{
+  /*! \brief Computed cuts. */
+  uint32_t cuts_total{ 0 };
+
+  /*! \brief Gates count. */
+  uint32_t and2{ 0 };
+  uint32_t maj3{ 0 };
+  uint32_t xor2{ 0 };
+  uint32_t xor3{ 0 };
+
+  /*! \brief Hashed classes. */
+  uint32_t num_classes{ 0 };
+
+  /*! \brief Hash size. */
+  uint32_t mapped_ha{ 0 };
+  uint32_t mapped_fa{ 0 };
+
+  /*! \brief Total runtime. */
+  stopwatch<>::duration time_total{ 0 };
+
+  void report() const
+  {
+    std::cout << fmt::format( "[i] Cuts = {}\t And2 = {}\t Xor2 = {}\t Maj3 = {}\t Xor3 = {}\n",
+                              cuts_total, and2, xor2, maj3, xor3 );
+    std::cout << fmt::format( "[i] Classes = {} \tMapped HA = {}\t Mapped FA:{}\n", num_classes, mapped_ha, mapped_fa );
+    std::cout << fmt::format( "[i] Total runtime = {:>5.2f} secs\n", to_seconds( time_total ) );
+  }
+};
+
+namespace detail
+{
+
+struct triple_hash
+{
+  uint64_t operator()( const std::array<uint32_t, 3>& p ) const
+  {
+    uint64_t seed = hash_block( p[0] );
+
+    hash_combine( seed, hash_block( p[1] ) );
+    hash_combine( seed, hash_block( p[2] ) );
+
+    return seed;
+  }
+};
+
+struct cut_enumeration_fa_cut
+{
+  /* stats */
+  bool is_xor{ false };
+};
+
+template<class Ntk>
+class extract_adders_impl
+{
+public:
+  using network_cuts_t = fast_network_cuts<Ntk, 3, true, cut_enumeration_fa_cut>;
+  using cut_t = typename network_cuts_t::cut_t;
+  using leaves_hash_t = phmap::flat_hash_map<std::array<uint32_t, 3>, std::vector<uint64_t>, triple_hash>;
+  using match_pair_t = std::pair<uint64_t, uint64_t>;
+  using matches_t = std::vector<match_pair_t>;
+  using block_map = node_map<signal<block_network>, Ntk>;
+
+public:
+  explicit extract_adders_impl( Ntk& ntk, extract_adders_params const& ps, extract_adders_stats& st )
+      : ntk( ntk ),
+        ps( ps ),
+        st( st ),
+        cuts( fast_cut_enumeration<Ntk, 3, true, cut_enumeration_fa_cut>( ntk, ps.cut_enumeration_ps ) ),
+        cuts_classes(),
+        half_adders(),
+        full_adders(),
+        node_match( ntk.size(), UINT32_MAX )
+  {
+    cuts_classes.reserve( 2000 );
+    tmp_visited.reserve( 20 );
+  }
+
+  block_network run()
+  {
+    stopwatch t( st.time_total );
+
+    auto [res, old2new] = initialize_map_network();
+    create_classes();
+    match_adders();
+    map();
+    topo_sort();
+    finalize( res, old2new );
+
+    return res;
+  }
+
+private:
+  void create_classes()
+  {
+    uint32_t counter = 0;
+    std::array<uint32_t, 3> leaves = { 0, 0, 0 };
+
+    st.cuts_total = cuts.total_cuts();
+
+    ntk.foreach_gate( [&]( auto const& n ) {
+      uint32_t cut_index = 0;
+      for ( auto& cut : cuts.cuts( ntk.node_to_index( n ) ) )
+      {
+        kitty::static_truth_table<3> tt = cuts.truth_table( *cut );
+
+        bool to_add = false;
+        if ( cut->size() == 2 )
+        {
+          /* check for and2 */
+          for ( uint32_t func : and2func )
+          {
+            if ( tt._bits == func )
+            {
+              ++st.and2;
+              to_add = true;
+              break;
+            }
+          }
+
+          /* check for xor2 */
+          for ( uint32_t func : xor2func )
+          {
+            if ( tt._bits == func )
+            {
+              ++st.xor2;
+              ( *cut )->data.is_xor = true;
+              to_add = true;
+              break;
+            }
+          }
+        }
+        else if ( cut->size() == 3 )
+        {
+          /* check for maj3 */
+          for ( uint32_t func : maj3func )
+          {
+            if ( tt._bits == func )
+            {
+              ++st.maj3;
+              to_add = true;
+              break;
+            }
+          }
+
+          /* check xor3 */
+          for ( uint32_t func : xor3func )
+          {
+            if ( tt._bits == func )
+            {
+              ++st.xor3;
+              ( *cut )->data.is_xor = true;
+              to_add = true;
+              break;
+            }
+          }
+        }
+
+        if ( !to_add )
+        {
+          ++cut_index;
+          continue;
+        }
+
+        uint64_t data = ( static_cast<uint64_t>( ntk.node_to_index( n ) ) << 16 ) | cut_index;
+        leaves[2] = 0;
+        uint32_t i = 0;
+        for ( auto l : *cut )
+          leaves[i++] = l;
+
+        /* add to hash table */
+        auto& v = cuts_classes[leaves];
+        v.push_back( data );
+
+        ++cut_index;
+      }
+    } );
+
+    st.num_classes = cuts_classes.size();
+  }
+
+  void match_adder2( std::pair<std::array<uint32_t, 3>, std::vector<uint64_t>> const& it )
+  {
+    for ( uint32_t i = 0; i < it.second.size() - 1; ++i )
+    {
+      uint64_t data_i = it.second[i];
+      uint32_t index_i = data_i >> 16;
+      uint32_t cut_index_i = data_i & UINT16_MAX;
+      auto const& cut_i = cuts.cuts( index_i )[cut_index_i];
+
+      /* TODO: find unique matches */
+      for ( uint32_t j = i + 1; j < it.second.size(); ++j )
+      {
+        uint64_t data_j = it.second[j];
+        uint32_t index_j = data_j >> 16;
+        uint32_t cut_index_j = data_j & UINT16_MAX;
+        auto const& cut_j = cuts.cuts( index_j )[cut_index_j];
+
+        /* not compatible */
+        if ( cut_i->data.is_xor == cut_j->data.is_xor )
+          continue;
+
+        /* check compatibility */
+        if ( !check_adder( index_i, index_j, cut_i ) )
+          continue;
+
+        assert( cut_i.size() == 2 );
+        assert( cut_j.size() == 2 );
+
+        half_adders.push_back( { data_i, data_j } );
+      }
+    }
+  }
+
+  void match_adders()
+  {
+    half_adders.reserve( cuts_classes.size() );
+    full_adders.reserve( cuts_classes.size() );
+    ntk.clear_values();
+
+    for ( auto& it : cuts_classes )
+    {
+      /* not matched */
+      if ( it.second.size() < 2 )
+        continue;
+
+      /* half adder */
+      if ( it.first[2] == 0 )
+      {
+        match_adder2( it );
+        continue;
+      }
+
+      for ( uint32_t i = 0; i < it.second.size() - 1; ++i )
+      {
+        uint64_t data_i = it.second[i];
+        uint32_t index_i = data_i >> 16;
+        uint32_t cut_index_i = data_i & UINT16_MAX;
+        auto const& cut_i = cuts.cuts( index_i )[cut_index_i];
+
+        /* TODO: find unique matches */
+        for ( uint32_t j = i + 1; j < it.second.size(); ++j )
+        {
+          uint64_t data_j = it.second[j];
+          uint32_t index_j = data_j >> 16;
+          uint32_t cut_index_j = data_j & UINT16_MAX;
+          auto const& cut_j = cuts.cuts( index_j )[cut_index_j];
+
+          /* not compatible */
+          if ( cut_i->data.is_xor == cut_j->data.is_xor )
+            continue;
+
+          /* check compatibility */
+          if ( !check_adder( index_i, index_j, cut_i ) )
+            continue;
+
+          assert( cut_i.size() == 3 );
+          assert( cut_j.size() == 3 );
+
+          full_adders.push_back( { data_i, data_j } );
+        }
+      }
+    }
+  }
+
+  void map()
+  {
+    selected.reserve( full_adders.size() + half_adders.size() );
+
+    ntk.incr_trav_id();
+
+    for ( uint32_t i = 0; i < full_adders.size(); ++i )
+    {
+      auto& pair = full_adders[i];
+      uint32_t index1 = pair.first >> 16;
+      uint32_t index2 = pair.second >> 16;
+      uint32_t cut_index1 = pair.first & UINT16_MAX;
+      cut_t const& cut = cuts.cuts( index1 )[cut_index1];
+
+      /* remove overlapping multi-output gates */
+      if ( !gate_mark( index1, index2, cut ) )
+        continue;
+
+      selected.push_back( 2 * i );
+      node_match[std::max( index1, index2 )] = 2 * i;
+      node_match[std::min( index1, index2 )] = UINT32_MAX - 1;
+
+      ++st.mapped_fa;
+    }
+
+    for ( uint32_t i = 0; i < half_adders.size(); ++i )
+    {
+      auto& pair = half_adders[i];
+      uint32_t index1 = pair.first >> 16;
+      uint32_t index2 = pair.second >> 16;
+      uint32_t cut_index1 = pair.first & UINT16_MAX;
+      cut_t const& cut = cuts.cuts( index1 )[cut_index1];
+
+      if ( !gate_mark( index1, index2, cut ) )
+        continue;
+
+      selected.push_back( 2 * i + 1 );
+      node_match[std::max( index1, index2 )] = 2 * i + 1;
+      node_match[std::min( index1, index2 )] = UINT32_MAX - 1;
+
+      ++st.mapped_ha;
+    }
+  }
+
+  void topo_sort()
+  {
+    topo_order.reserve( ntk.size() );
+
+    /* add map choices */
+    choice_view<Ntk> choice_ntk{ ntk };
+    add_choices( choice_ntk );
+
+    ntk.incr_trav_id();
+    ntk.incr_trav_id();
+
+    /* add constants and CIs */
+    const auto c0 = ntk.get_node( ntk.get_constant( false ) );
+    ntk.set_visited( c0, ntk.trav_id() );
+
+    if ( const auto c1 = ntk.get_node( ntk.get_constant( true ) ); ntk.visited( c1 ) != ntk.trav_id() )
+    {
+      ntk.set_visited( c1, ntk.trav_id() );
+    }
+
+    ntk.foreach_ci( [&]( auto const& n ) {
+      if ( ntk.visited( n ) != ntk.trav_id() )
+      {
+        ntk.set_visited( n, ntk.trav_id() );
+      }
+    } );
+
+    /* sort topologically */
+    ntk.foreach_co( [&]( auto const& f ) {
+      if ( ntk.visited( ntk.get_node( f ) ) == ntk.trav_id() )
+        return;
+      topo_sort_rec( choice_ntk, ntk.get_node( f ) );
+    } );
+  }
+
+  void add_choices( choice_view<Ntk>& choice_ntk )
+  {
+    for ( uint32_t index : selected )
+    {
+      auto& pair = ( index & 1 ) ? half_adders[index >> 1] : full_adders[index >> 1];
+      uint32_t index1 = pair.first >> 16;
+      uint32_t index2 = pair.second >> 16;
+
+      if ( index1 > index2 )
+        std::swap( index1, index2 );
+
+      choice_ntk.add_choice( ntk.index_to_node( index1 ), ntk.index_to_node( index2 ) );
+
+      assert( choice_ntk.count_choices( ntk.index_to_node( index1 ) ) == 2 );
+    }
+  }
+
+  inline bool check_adder( uint32_t index1, uint32_t index2, cut_t const& cut )
+  {
+    bool valid = true;
+
+    /* check containment of cut1 in cut2 and viceversa */
+    if ( index1 > index2 )
+    {
+      std::swap( index1, index2 );
+    }
+
+    ntk.foreach_fanin( ntk.index_to_node( index2 ), [&]( auto const& f ) {
+      auto g = ntk.get_node( f );
+      if ( ntk.node_to_index( g ) == index1 && ntk.fanout_size( g ) == 1 )
+      {
+        valid = false;
+      }
+      return valid;
+    } );
+
+    if ( !valid )
+      return false;
+
+    /* check containment when node is reachable from middle nodes with multiple fanouts */
+    return check_adder_tfi_valid( ntk.index_to_node( index2 ), ntk.index_to_node( index1 ), cut );
+  }
+
+  inline bool gate_mark( uint32_t index1, uint32_t index2, cut_t const& cut )
+  {
+    bool contained = false;
+
+    /* mark leaves */
+    for ( auto leaf : cut )
+    {
+      ntk.incr_value( ntk.index_to_node( leaf ) );
+    }
+
+    contained = mark_visited_rec<false>( ntk.index_to_node( index1 ) );
+    contained |= mark_visited_rec<false>( ntk.index_to_node( index2 ) );
+
+    if ( contained )
+    {
+      /* unmark leaves */
+      for ( auto leaf : cut )
+      {
+        ntk.decr_value( ntk.index_to_node( leaf ) );
+      }
+      return false;
+    }
+
+    /* mark*/
+    mark_visited_rec<true>( ntk.index_to_node( index1 ) );
+    mark_visited_rec<true>( ntk.index_to_node( index2 ) );
+
+    /* unmark leaves */
+    for ( auto leaf : cut )
+    {
+      ntk.decr_value( ntk.index_to_node( leaf ) );
+    }
+
+    return true;
+  }
+
+  template<bool MARK>
+  bool mark_visited_rec( node<Ntk> const& n )
+  {
+    /* leaf */
+    if ( ntk.value( n ) )
+      return false;
+
+    /* already visited */
+    if ( ntk.visited( n ) == ntk.trav_id() )
+      return true;
+
+    if constexpr ( MARK )
+    {
+      ntk.set_visited( n, ntk.trav_id() );
+    }
+
+    bool contained = false;
+    ntk.foreach_fanin( n, [&]( auto const& f ) {
+      contained |= mark_visited_rec<MARK>( ntk.get_node( f ) );
+
+      if constexpr ( !MARK )
+      {
+        if ( contained )
+          return false;
+      }
+
+      return true;
+    } );
+
+    return contained;
+  }
+
+  inline bool check_adder_tfi_valid( node<Ntk> const& root, node<Ntk> const& n, cut_t const& cut )
+  {
+    /* reference cut leaves */
+    for ( auto leaf : cut )
+    {
+      ntk.incr_value( ntk.index_to_node( leaf ) );
+    }
+
+    bool valid = true;
+    if ( ps.use_mffc_filter )
+    {
+      tmp_visited.clear();
+      dereference_node_rec( root );
+
+      if ( ntk.fanout_size( n ) == 0 )
+        valid = false;
+
+      for ( auto g : tmp_visited )
+        ntk.incr_fanout_size( g );
+    }
+    else
+    {
+      ntk.incr_trav_id();
+      check_adder_tfi_valid_rec( root, root, n, valid );
+    }
+
+    /* dereference leaves */
+    for ( auto leaf : cut )
+    {
+      ntk.decr_value( ntk.index_to_node( leaf ) );
+    }
+
+    return valid;
+  }
+
+  bool check_adder_tfi_valid_rec( node<Ntk> const& n, node<Ntk> const& root, node<Ntk> const& target, bool& valid )
+  {
+    /* leaf */
+    if ( ntk.value( n ) )
+      return false;
+
+    /* already visited */
+    if ( ntk.visited( n ) == ntk.trav_id() )
+      return false;
+
+    ntk.set_visited( n, ntk.trav_id() );
+
+    if ( n == target )
+      return true;
+
+    bool found = false;
+    ntk.foreach_fanin( n, [&]( auto const& f ) {
+      found |= check_adder_tfi_valid_rec( ntk.get_node( f ), root, target, valid );
+      return valid;
+    } );
+
+    if ( found && n != root && ntk.fanout_size( n ) > 1 )
+      valid = false;
+
+    return found;
+  }
+
+  void dereference_node_rec( node<Ntk> const& n )
+  {
+    /* leaf */
+    if ( ntk.value( n ) )
+      return;
+
+    ntk.foreach_fanin( n, [&]( auto const& f ) {
+      node<Ntk> g = ntk.get_node( f );
+      if ( ntk.decr_fanout_size( g ) == 0 )
+      {
+        dereference_node_rec( g );
+      }
+      tmp_visited.push_back( g );
+    } );
+  }
+
+  inline bool is_in_tfi( node<Ntk> const& root, node<Ntk> const& n, cut_t const& cut )
+  {
+    /* reference cut leaves */
+    for ( auto leaf : cut )
+    {
+      ntk.incr_value( ntk.index_to_node( leaf ) );
+    }
+
+    ntk.incr_trav_id();
+    mark_visited_rec<true>( root );
+    bool contained = ntk.visited( n ) == ntk.trav_id();
+
+    /* dereference leaves */
+    for ( auto leaf : cut )
+    {
+      ntk.decr_value( ntk.index_to_node( leaf ) );
+    }
+
+    return contained;
+  }
+
+  void topo_sort_rec( choice_view<Ntk>& choice_ntk, node<Ntk> const& n )
+  {
+    /* is permanently marked? */
+    if ( ntk.visited( n ) == ntk.trav_id() )
+      return;
+
+    /* get the representative (smallest index) */
+    node<Ntk> repr = choice_ntk.get_choice_representative( n );
+
+    /* multioutput gate */
+    if ( choice_ntk.count_choices( repr ) > 1 )
+    {
+      /* get the cut */
+      uint32_t max_index = 0;
+      choice_ntk.foreach_choice( repr, [&]( auto const& g ) {
+        /* ensure that the node is not visited or temporarily marked */
+        assert( ntk.visited( g ) != ntk.trav_id() );
+        assert( ntk.visited( g ) != ntk.trav_id() - 1 );
+
+        /* mark node temporarily */
+        ntk.set_visited( g, ntk.trav_id() - 1 );
+
+        max_index = std::max( max_index, ntk.node_to_index( g ) );
+        return true;
+      } );
+
+      uint32_t cindex = node_match[max_index];
+      auto& pair = ( cindex & 1 ) ? half_adders[cindex >> 1] : full_adders[cindex >> 1];
+      cut_t const& cut = cuts.cuts( pair.first >> 16 )[pair.first & UINT16_MAX];
+
+      for ( auto l : cut )
+      {
+        topo_sort_rec( choice_ntk, ntk.index_to_node( l ) );
+      }
+
+      choice_ntk.foreach_choice( repr, [&]( auto const& g ) {
+        /* ensure that the node is not visited */
+        assert( ntk.visited( g ) != ntk.trav_id() );
+
+        /* mark node n permanently */
+        ntk.set_visited( g, ntk.trav_id() );
+
+        /* visit node */
+        topo_order.push_back( g );
+
+        return true;
+      } );
+    }
+    else
+    {
+      /* ensure that the node is not visited or temporarily marked */
+      assert( ntk.visited( n ) != ntk.trav_id() );
+      assert( ntk.visited( n ) != ntk.trav_id() - 1 );
+
+      /* mark node temporarily */
+      ntk.set_visited( n, ntk.trav_id() - 1 );
+
+      /* mark cut leaves */
+      ntk.foreach_fanin( n, [&]( auto const& f ) {
+        topo_sort_rec( choice_ntk, ntk.get_node( f ) );
+      } );
+
+      /* ensure that the node is not visited */
+      assert( ntk.visited( n ) != ntk.trav_id() );
+
+      /* mark node n permanently */
+      ntk.set_visited( n, ntk.trav_id() );
+
+      /* visit node */
+      topo_order.push_back( n );
+    }
+  }
+
+  std::pair<block_network, block_map> initialize_map_network()
+  {
+    block_network dest;
+    block_map old2new( ntk );
+
+    old2new[ntk.get_node( ntk.get_constant( false ) )] = dest.get_constant( false );
+    if ( ntk.get_node( ntk.get_constant( true ) ) != ntk.get_node( ntk.get_constant( false ) ) )
+      old2new[ntk.get_node( ntk.get_constant( true ) )] = dest.get_constant( true );
+
+    ntk.foreach_ci( [&]( auto const& n ) {
+      old2new[n] = dest.create_pi();
+    } );
+    return { dest, old2new };
+  }
+
+  void finalize( block_network& res, block_map& old2new )
+  {
+    for ( auto const& n : topo_order )
+    {
+      if ( ntk.is_pi( n ) || ntk.is_constant( n ) )
+        continue;
+
+      /* is a multioutput gate root? */
+      if ( node_match[ntk.node_to_index( n )] == UINT32_MAX )
+      {
+        finalize_simple_gate( res, old2new, n );
+      }
+      else if ( node_match[ntk.node_to_index( n )] < UINT32_MAX - 1 )
+      {
+        finalize_multi_gate( res, old2new, n );
+      }
+    }
+
+    /* create POs */
+    ntk.foreach_co( [&]( auto const& f ) {
+      res.create_po( ntk.is_complemented( f ) ? !old2new[f] : old2new[f] );
+    } );
+  }
+
+  inline void finalize_simple_gate( block_network& res, block_map& old2new, node<Ntk> const& n )
+  {
+    kitty::dynamic_truth_table tt = ntk.node_function( n );
+
+    std::vector<signal<block_network>> children;
+    ntk.foreach_fanin( n, [&]( auto const& f, auto i ) {
+      auto s = old2new[f] ^ ntk.is_complemented( f );
+      children.push_back( s );
+    } );
+
+    old2new[n] = res.create_node( children, tt );
+  }
+
+  inline void finalize_multi_gate( block_network& res, block_map& old2new, node<Ntk> const& n )
+  {
+    uint32_t index = node_match[ntk.node_to_index( n )];
+    assert( index < UINT32_MAX - 1 );
+
+    /* extract the match */
+    if ( index & 1 )
+      finalize_multi_gate_ha( res, old2new, n, index >> 1 );
+    else
+      finalize_multi_gate_fa( res, old2new, n, index >> 1 );
+  }
+
+  inline void finalize_multi_gate_ha( block_network& res, block_map& old2new, node<Ntk> const& n, uint32_t index )
+  {
+    auto& pair = half_adders[index];
+    uint32_t index1 = pair.first >> 16;
+    uint32_t index2 = pair.second >> 16;
+    uint32_t cut_index1 = pair.first & UINT16_MAX;
+    uint32_t cut_index2 = pair.second & UINT16_MAX;
+    cut_t const& cut1 = cuts.cuts( index1 )[cut_index1];
+    cut_t const& cut2 = cuts.cuts( index2 )[cut_index2];
+
+    kitty::static_truth_table<3> tt1 = cuts.truth_table( cut1 );
+    kitty::static_truth_table<3> tt2 = cuts.truth_table( cut2 );
+    bool xor_is_1 = false;
+
+    /* find the XOR2 */
+    xor_is_1 = cut1->data.is_xor;
+
+    /* find the negation vector of AND2 and XOR2*/
+    kitty::static_truth_table<3> tt = xor_is_1 ? tt2 : tt1;
+    uint32_t neg_and = 0;
+    for ( uint32_t func : and2func )
+    {
+      if ( tt._bits == func )
+        break;
+      ++neg_and;
+    }
+
+    tt = xor_is_1 ? tt1 : tt2;
+    uint32_t neg_xor = 0;
+    for ( uint32_t func : xor2func )
+    {
+      if ( tt._bits == func )
+        break;
+      ++neg_xor;
+    }
+    neg_xor ^= neg_and;
+    neg_xor = ( neg_xor & 1 ) ^ ( ( neg_xor >> 1 ) & 1 ) ^ ( ( neg_xor >> 2 ) & 1 );
+
+    /* normalize and create multioutput gate */
+    std::array<signal<block_network>, 2> children;
+    uint32_t ctr = 0;
+    for ( auto l : cut1 )
+    {
+      signal<block_network> f = old2new[ntk.index_to_node( l )];
+      bool phase = ( ( neg_and >> ctr ) & 1 ) ? true : false;
+      children[ctr] = f ^ phase;
+      ++ctr;
+    }
+
+    if ( ps.map_inverted )
+    {
+      signal<block_network> ha = res.create_hai( children[0], children[1] );
+      old2new[ntk.index_to_node( xor_is_1 ? index2 : index1 )] = ha ^ ( ( neg_and >> 2 ) ? false : true );
+      old2new[ntk.index_to_node( xor_is_1 ? index1 : index2 )] = res.next_output_pin( ha ) ^ ( neg_xor ? false : true );
+      return;
+    }
+
+    signal<block_network> ha = res.create_ha( children[0], children[1] );
+    old2new[ntk.index_to_node( xor_is_1 ? index2 : index1 )] = ha ^ ( ( neg_and >> 2 ) ? true : false );
+    old2new[ntk.index_to_node( xor_is_1 ? index1 : index2 )] = res.next_output_pin( ha ) ^ ( neg_xor ? true : false );
+  }
+
+  inline void finalize_multi_gate_fa( block_network& res, block_map& old2new, node<Ntk> const& n, uint32_t index )
+  {
+    auto& pair = full_adders[index];
+    uint32_t index1 = pair.first >> 16;
+    uint32_t index2 = pair.second >> 16;
+    uint32_t cut_index1 = pair.first & UINT16_MAX;
+    uint32_t cut_index2 = pair.second & UINT16_MAX;
+    cut_t const& cut1 = cuts.cuts( index1 )[cut_index1];
+    cut_t const& cut2 = cuts.cuts( index2 )[cut_index2];
+
+    kitty::static_truth_table<3> tt1 = cuts.truth_table( cut1 );
+    kitty::static_truth_table<3> tt2 = cuts.truth_table( cut2 );
+
+    bool xor_is_1 = false;
+
+    /* find the XOR3 */
+    xor_is_1 = cut1->data.is_xor;
+
+    /* find the phase and permutation of MAJ3 and XOR3*/
+    kitty::static_truth_table<3> tt = xor_is_1 ? tt2 : tt1;
+    uint32_t neg_maj = 0;
+    for ( uint32_t func : maj3func )
+    {
+      if ( tt._bits == func )
+        break;
+      ++neg_maj;
+    }
+
+    if ( ps.map_inverted )
+    {
+      neg_maj = ( ~neg_maj ) & 0x7;
+    }
+
+    tt = xor_is_1 ? tt1 : tt2;
+    uint32_t neg_xor = 0;
+    for ( uint32_t func : xor3func )
+    {
+      if ( tt._bits == func )
+        break;
+      ++neg_xor;
+    }
+    neg_xor ^= neg_maj;
+    neg_xor = ( neg_xor & 1 ) ^ ( ( neg_xor >> 1 ) & 1 ) ^ ( ( neg_xor >> 2 ) & 1 );
+
+    /* normalize and create the multioutput gate */
+    std::array<signal<block_network>, 3> children;
+    uint32_t ctr = 0;
+    for ( auto l : cut1 )
+    {
+      signal<block_network> f = old2new[ntk.index_to_node( l )];
+      bool phase = ( ( neg_maj >> ctr ) & 1 ) ? true : false;
+      children[ctr] = f ^ phase;
+      ++ctr;
+    }
+
+    if ( ps.map_inverted )
+    {
+      signal<block_network> fa = res.create_fai( children[0], children[1], children[2] );
+      old2new[ntk.index_to_node( xor_is_1 ? index2 : index1 )] = fa;
+      old2new[ntk.index_to_node( xor_is_1 ? index1 : index2 )] = res.next_output_pin( fa ) ^ ( neg_xor ? false : true );
+      return;
+    }
+
+    signal<block_network> fa = res.create_fa( children[0], children[1], children[2] );
+    old2new[ntk.index_to_node( xor_is_1 ? index2 : index1 )] = fa;
+    old2new[ntk.index_to_node( xor_is_1 ? index1 : index2 )] = res.next_output_pin( fa ) ^ ( neg_xor ? true : false );
+  }
+
+private:
+  Ntk& ntk;
+  extract_adders_params const& ps;
+  extract_adders_stats& st;
+
+  network_cuts_t cuts;
+  leaves_hash_t cuts_classes;
+  matches_t half_adders;
+  matches_t full_adders;
+  std::vector<uint32_t> selected;
+  std::vector<uint32_t> node_match;
+
+  std::vector<node<Ntk>> topo_order;
+  std::vector<node<Ntk>> tmp_visited;
+
+  const std::array<uint64_t, 8> and2func = { 0x88, 0x44, 0x22, 0x11, 0x77, 0xbb, 0xdd, 0xee };
+  const std::array<uint64_t, 8> maj3func = { 0xe8, 0xd4, 0xb2, 0x71, 0x8e, 0xd4, 0x2b, 0x17 };
+  const std::array<uint64_t, 2> xor2func = { 0x66, 0x99 };
+  const std::array<uint64_t, 2> xor3func = { 0x96, 0x69 };
+};
+
+} /* namespace detail */
+
+/*! \brief Adders extraction.
+ *
+ * This function extracts half and full adders from a network.
+ * It returns a `block_network` with extracted half and full adder
+ * blocks.
+ *
+ * **Required network functions:**
+ * - `size`
+ * - `is_pi`
+ * - `is_constant`
+ * - `node_to_index`
+ * - `index_to_node`
+ * - `get_node`
+ * - `foreach_co`
+ * - `foreach_node`
+ * - `foreach_gate`
+ *
+ * \param ntk Network
+ * \param ps Parameters
+ * \param pst Stats
+ *
+ */
+template<class Ntk>
+block_network extract_adders( Ntk& ntk, extract_adders_params const& ps = {}, extract_adders_stats* pst = {} )
+{
+  static_assert( is_network_type_v<Ntk>, "Ntk is not a network type" );
+  static_assert( has_size_v<Ntk>, "Ntk does not implement the size method" );
+  static_assert( has_is_pi_v<Ntk>, "Ntk does not implement the is_pi method" );
+  static_assert( has_is_constant_v<Ntk>, "Ntk does not implement the is_constant method" );
+  static_assert( has_node_to_index_v<Ntk>, "Ntk does not implement the node_to_index method" );
+  static_assert( has_index_to_node_v<Ntk>, "Ntk does not implement the index_to_node method" );
+  static_assert( has_get_node_v<Ntk>, "Ntk does not implement the get_node method" );
+  static_assert( has_foreach_node_v<Ntk>, "Ntk does not implement the foreach_node method" );
+  static_assert( has_foreach_gate_v<Ntk>, "Ntk does not implement the foreach_node method" );
+  static_assert( has_foreach_co_v<Ntk>, "Ntk does not implement the foreach_co method" );
+
+  extract_adders_stats st;
+
+  detail::extract_adders_impl p( ntk, ps, st );
+  block_network res = p.run();
+
+  if ( ps.verbose )
+    st.report();
+
+  if ( pst )
+    *pst = st;
+
+  return res;
+}
+
+} /* namespace mockturtle */
\ No newline at end of file
diff --git a/include/mockturtle/algorithms/mapper.hpp b/include/mockturtle/algorithms/mapper.hpp
index 500786752..3ec273084 100644
--- a/include/mockturtle/algorithms/mapper.hpp
+++ b/include/mockturtle/algorithms/mapper.hpp
@@ -37,20 +37,29 @@
 
 #include <fmt/format.h>
 
+#include "../networks/aig.hpp"
 #include "../networks/klut.hpp"
+#include "../networks/mig.hpp"
 #include "../networks/sequential.hpp"
+#include "../networks/xag.hpp"
 #include "../utils/node_map.hpp"
 #include "../utils/stopwatch.hpp"
 #include "../utils/tech_library.hpp"
 #include "../views/binding_view.hpp"
+#include "../views/color_view.hpp"
 #include "../views/depth_view.hpp"
 #include "../views/topo_view.hpp"
+#include "../views/window_view.hpp"
 #include "cleanup.hpp"
 #include "cut_enumeration.hpp"
 #include "cut_enumeration/exact_map_cut.hpp"
 #include "cut_enumeration/tech_map_cut.hpp"
 #include "detail/mffc_utils.hpp"
 #include "detail/switching_activity.hpp"
+#include "reconv_cut.hpp"
+#include "resyn_engines/mig_resyn.hpp"
+#include "resyn_engines/xag_resyn.hpp"
+#include "simulation.hpp"
 
 namespace mockturtle
 {
@@ -99,6 +108,12 @@ struct map_params
   /*! \brief Maximum number of cuts evaluated for logic sharing. */
   uint32_t logic_sharing_cut_limit{ 8u };
 
+  /*! \brief Use satisfiability don't cares for optimization. */
+  bool use_dont_cares{ false };
+
+  /*! \brief Window size for don't cares calculation. */
+  uint32_t window_size{ 12u };
+
   /*! \brief Be verbose. */
   bool verbose{ false };
 };
@@ -247,7 +262,7 @@ class tech_map_impl
     /* execute mapping */
     if ( !execute_mapping() )
       return res;
-    
+
     /* insert buffers for POs driven by PIs */
     insert_buffers();
 
@@ -278,7 +293,7 @@ class tech_map_impl
     /* execute mapping */
     if ( !execute_mapping() )
       return res;
-    
+
     /* insert buffers for POs driven by PIs */
     insert_buffers();
 
@@ -383,7 +398,7 @@ class tech_map_impl
           continue;
         }
         const auto tt = cuts.truth_table( *cut );
-        const auto fe = kitty::shrink_to<NInputs>( tt );
+        const auto fe = kitty::extend_to<6>( tt );
         auto fe_canon = fe;
 
         uint8_t negations_pos = 0;
@@ -1139,7 +1154,7 @@ class tech_map_impl
   {
     auto& node_data = node_match[index];
 
-    kitty::static_truth_table<NInputs> zero_tt;
+    kitty::static_truth_table<6> zero_tt;
     auto const supergates_zero = library.get_supergates( zero_tt );
     auto const supergates_one = library.get_supergates( ~zero_tt );
 
@@ -1869,6 +1884,7 @@ template<class NtkDest, unsigned CutSize, typename CutData, class Ntk, class Rew
 class exact_map_impl
 {
 public:
+  static constexpr uint32_t max_window_size = 12;
   using network_cuts_t = fast_network_cuts<Ntk, CutSize, true, CutData>;
   using cut_t = typename network_cuts_t::cut_t;
 
@@ -1899,7 +1915,14 @@ class exact_map_impl
     } );
 
     /* match cuts with gates */
-    compute_matches();
+    if ( ps.use_dont_cares )
+    {
+      compute_matches_dc();
+    }
+    else
+    {
+      compute_matches();
+    }
 
     /* init the data structure */
     init_nodes();
@@ -2005,7 +2028,7 @@ class exact_map_impl
 
         /* match the cut using canonization and get the gates */
         const auto tt = cuts.truth_table( *cut );
-        const auto fe = kitty::shrink_to<NInputs>( tt );
+        const auto fe = kitty::extend_to<NInputs>( tt );
         const auto config = kitty::exact_npn_canonization( fe );
         auto const supergates_npn = library.get_supergates( std::get<0>( config ) );
         auto const supergates_npn_neg = library.get_supergates( ~std::get<0>( config ) );
@@ -2041,6 +2064,134 @@ class exact_map_impl
     } );
   }
 
+  void compute_matches_dc()
+  {
+    reconvergence_driven_cut_parameters rps;
+    rps.max_leaves = ps.window_size;
+    reconvergence_driven_cut_statistics rst;
+    detail::reconvergence_driven_cut_impl<Ntk, false, false> reconv_cuts( ntk, rps, rst );
+
+    color_view<Ntk> color_ntk{ ntk };
+    std::array<uint32_t, NInputs> divisors;
+    for ( uint32_t i = 0; i < NInputs; ++i )
+    {
+      divisors[i] = i;
+    }
+
+    /* match gates */
+    ntk.foreach_gate( [&]( auto const& n ) {
+      const auto index = ntk.node_to_index( n );
+      std::vector<cut_match_t<NtkDest, NInputs>> node_matches;
+
+      std::vector<node<Ntk>> roots = { n };
+      auto const extended_leaves = reconv_cuts.run( roots ).first;
+
+      std::vector<node<Ntk>> gates{ collect_nodes( color_ntk, extended_leaves, roots ) };
+      window_view window_ntk{ color_ntk, extended_leaves, roots, gates };
+
+      default_simulator<kitty::static_truth_table<max_window_size>> sim;
+      const auto tts = simulate_nodes<kitty::static_truth_table<max_window_size>>( window_ntk, sim );
+
+      auto i = 0u;
+      for ( auto& cut : cuts.cuts( index ) )
+      {
+        /* ignore unit cut */
+        if ( cut->size() == 1 && *cut->begin() == index )
+        {
+          ( *cut )->data.ignore = true;
+          continue;
+        }
+
+        if ( cut->size() > NInputs )
+        {
+          /* Ignore cuts too big to be mapped using the library */
+          ( *cut )->data.ignore = true;
+          continue;
+        }
+
+        /* match the cut using canonization and get the gates */
+        const auto tt = cuts.truth_table( *cut );
+        const auto fe = kitty::shrink_to<NInputs>( tt );
+
+        auto [tt_npn, neg, perm] = kitty::exact_npn_canonization( fe );
+        auto perm_neg = perm;
+        auto neg_neg = neg;
+
+        /* dont cares computation */
+        kitty::static_truth_table<NInputs> care;
+
+        bool containment = true;
+        bool filter = false;
+        for ( auto const& l : *cut )
+        {
+          if ( color_ntk.color( ntk.index_to_node( l ) ) != color_ntk.current_color() )
+          {
+            containment = false;
+            break;
+          }
+        }
+
+        if ( containment )
+        {
+          /* compute care set */
+          for ( auto i = 0u; i < ( 1u << window_ntk.num_pis() ); ++i )
+          {
+            uint32_t entry{ 0u };
+            auto j = 0u;
+            for ( auto const& l : *cut )
+            {
+              entry |= kitty::get_bit( tts[l], i ) << j;
+              ++j;
+            }
+            kitty::set_bit( care, entry );
+          }
+        }
+        else
+        {
+          /* completely specified */
+          care = ~care;
+        }
+
+        auto const dc_npn = apply_npn_transformation( ~care, neg & ~( 1 << NInputs ), perm );
+        const std::vector<exact_supergate<NtkDest, NInputs>>* supergates_npn = library.get_supergates( tt_npn, dc_npn, neg, perm );
+        const std::vector<exact_supergate<NtkDest, NInputs>>* supergates_npn_neg = library.get_supergates( ~tt_npn, dc_npn, neg_neg, perm_neg );
+
+        if ( supergates_npn != nullptr || supergates_npn_neg != nullptr )
+        {
+          cut_match_t<NtkDest, NInputs> match;
+
+          if ( supergates_npn == nullptr )
+          {
+            perm = perm_neg;
+            neg = neg_neg;
+          }
+
+          uint8_t phase = ( neg >> NInputs ) & 1;
+
+          match.supergates[phase] = supergates_npn;
+          match.supergates[phase ^ 1] = supergates_npn_neg;
+
+          /* store permutations and negations */
+          match.negation = 0;
+          for ( auto j = 0u; j < perm.size() && j < NInputs; ++j )
+          {
+            match.permutation[perm[j]] = j;
+            match.negation |= ( ( neg >> perm[j] ) & 1 ) << j;
+          }
+          node_matches.push_back( match );
+          ( *cut )->data.match_index = i++;
+        }
+        else
+        {
+          /* Ignore not matched cuts */
+          ( *cut )->data.ignore = true;
+        }
+      }
+
+      matches[index] = node_matches;
+    } );
+  }
+
   template<bool DO_AREA>
   bool compute_mapping()
   {
diff --git a/include/mockturtle/algorithms/rewrite.hpp b/include/mockturtle/algorithms/rewrite.hpp
index 123344b47..e515dae93 100644
--- a/include/mockturtle/algorithms/rewrite.hpp
+++ b/include/mockturtle/algorithms/rewrite.hpp
@@ -36,11 +36,15 @@
 #include "../utils/cost_functions.hpp"
 #include "../utils/node_map.hpp"
 #include "../utils/stopwatch.hpp"
+#include "../views/color_view.hpp"
 #include "../views/depth_view.hpp"
 #include "../views/fanout_view.hpp"
+#include "../views/window_view.hpp"
 #include "cleanup.hpp"
 #include "cut_enumeration.hpp"
 #include "cut_enumeration/rewrite_cut.hpp"
+#include "reconv_cut.hpp"
+#include "simulation.hpp"
 
 #include <fmt/format.h>
 #include <kitty/dynamic_truth_table.hpp>
@@ -77,6 +81,12 @@ struct rewrite_params
   /*! \brief Allow zero-gain substitutions */
   bool allow_zero_gain{ false };
 
+  /*! \brief Use satisfiability don't cares for optimization. */
+  bool use_dont_cares{ false };
+
+  /*! \brief Window size for don't cares calculation. */
+  uint32_t window_size{ 8u };
+
   /*! \brief Be verbose. */
   bool verbose{ false };
 };
@@ -110,6 +120,7 @@ template<class Ntk, class Library, class NodeCostFn>
 class rewrite_impl
 {
   static constexpr uint32_t num_vars = 4u;
+  static constexpr uint32_t max_window_size = 8u;
   using network_cuts_t = dynamic_network_cuts<Ntk, num_vars, true, cut_enumeration_rewrite_cut>;
   using cut_manager_t = detail::dynamic_cut_enumeration_impl<Ntk, num_vars, true, cut_enumeration_rewrite_cut>;
   using cut_t = typename network_cuts_t::cut_t;
@@ -143,6 +154,18 @@ class rewrite_impl
       compute_required();
     }
 
+    if ( ps.use_dont_cares )
+      perform_rewriting_dc();
+    else
+      perform_rewriting();
+
+    st.estimated_gain = _estimated_gain;
+    st.candidates = _candidates;
+  }
+
+private:
+  void perform_rewriting()
+  {
     /* initialize the cut manager */
     cut_enumeration_stats cst;
     network_cuts_t cuts( ntk.size() + ( ntk.size() >> 1 ) );
@@ -308,12 +331,234 @@ class rewrite_impl
         clear_cuts_fanout_rec( cuts, cut_manager, ntk.get_node( new_f ) );
       }
     } );
+  }
 
-    st.estimated_gain = _estimated_gain;
-    st.candidates = _candidates;
+  void perform_rewriting_dc()
+  {
+    /* initialize the cut manager */
+    cut_enumeration_stats cst;
+    network_cuts_t cuts( ntk.size() + ( ntk.size() >> 1 ) );
+    cut_manager_t cut_manager( ntk, ps.cut_enumeration_ps, cst, cuts );
+
+    /* initialize cuts for constant nodes and PIs */
+    cut_manager.init_cuts();
+
+    auto& db = library.get_database();
+
+    std::array<signal<Ntk>, num_vars> leaves;
+    std::array<signal<Ntk>, num_vars> best_leaves;
+    std::array<uint8_t, num_vars> permutation;
+    signal<Ntk> best_signal;
+
+    reconvergence_driven_cut_parameters rps;
+    rps.max_leaves = ps.window_size;
+    reconvergence_driven_cut_statistics rst;
+    detail::reconvergence_driven_cut_impl<Ntk, false, has_level_v<Ntk>> reconv_cuts( ntk, rps, rst );
+    unordered_node_map<kitty::static_truth_table<max_window_size>, Ntk> tts( ntk );
+
+    color_view<Ntk> color_ntk{ ntk };
+    std::array<uint32_t, num_vars> divisors;
+    for ( uint32_t i = 0; i < num_vars; ++i )
+    {
+      divisors[i] = i;
+    }
+
+    const auto size = ntk.size();
+    ntk.foreach_gate( [&]( auto const& n, auto i ) {
+      if ( ntk.fanout_size( n ) == 0u )
+        return;
+
+      int32_t best_gain = -1;
+      uint32_t best_level = UINT32_MAX;
+      bool best_phase = false;
+
+      /* update level for node */
+      if constexpr ( has_level_v<Ntk> )
+      {
+        if ( ps.preserve_depth )
+        {
+          uint32_t level = 0;
+          ntk.foreach_fanin( n, [&]( auto const& f ) {
+            level = std::max( level, ntk.level( ntk.get_node( f ) ) );
+          } );
+          ntk.set_level( n, level + 1 );
+          best_level = level + 1;
+        }
+      }
+
+      cut_manager.clear_cuts( n );
+      cut_manager.compute_cuts( n );
+
+      /* compute window */
+      std::vector<node<Ntk>> roots = { n };
+      auto const extended_leaves = reconv_cuts.run( roots ).first;
+      std::vector<node<Ntk>> gates{ collect_nodes( color_ntk, extended_leaves, roots ) };
+      window_view window_ntk{ color_ntk, extended_leaves, roots, gates };
+
+      default_simulator<kitty::static_truth_table<max_window_size>> sim;
+      tts.reset();
+      simulate_nodes_with_node_map<kitty::static_truth_table<max_window_size>>( window_ntk, tts, sim );
+
+      uint32_t cut_index = 0;
+      for ( auto& cut : cuts.cuts( ntk.node_to_index( n ) ) )
+      {
+        /* skip trivial cut */
+        if ( ( cut->size() == 1 && *cut->begin() == ntk.node_to_index( n ) ) )
+        {
+          ++cut_index;
+          continue;
+        }
+
+        /* Boolean matching */
+        auto config = kitty::exact_npn_canonization( cuts.truth_table( *cut ) );
+        auto tt_npn = std::get<0>( config );
+        auto neg = std::get<1>( config );
+        auto perm = std::get<2>( config );
+
+        kitty::static_truth_table<num_vars> care;
+
+        bool containment = true;
+        for ( auto const& l : *cut )
+        {
+          if ( color_ntk.color( ntk.index_to_node( l ) ) != color_ntk.current_color() )
+          {
+            containment = false;
+            break;
+          }
+        }
+
+        if ( containment )
+        {
+          /* compute care set */
+          for ( auto i = 0u; i < ( 1u << window_ntk.num_pis() ); ++i )
+          {
+            uint32_t entry{ 0u };
+            auto j = 0u;
+            for ( auto const& l : *cut )
+            {
+              entry |= kitty::get_bit( tts[l], i ) << j;
+              ++j;
+            }
+            kitty::set_bit( care, entry );
+          }
+        }
+        else
+        {
+          /* completely specified */
+          care = ~care;
+        }
+
+        auto const dc_npn = apply_npn_transformation( ~care, neg & ~( 1 << num_vars ), perm );
+        auto const structures = library.get_supergates( tt_npn, dc_npn, neg, perm );
+
+        if ( structures == nullptr )
+        {
+          ++cut_index;
+          continue;
+        }
+
+        uint32_t negation = 0;
+        for ( auto j = 0u; j < num_vars; ++j )
+        {
+          permutation[perm[j]] = j;
+          negation |= ( ( neg >> perm[j] ) & 1 ) << j;
+        }
+
+        /* save output negation to apply */
+        bool phase = ( neg >> num_vars == 1 ) ? true : false;
+
+        {
+          auto j = 0u;
+          for ( auto const leaf : *cut )
+          {
+            leaves[permutation[j++]] = ntk.make_signal( ntk.index_to_node( leaf ) );
+          }
+
+          while ( j < num_vars )
+            leaves[permutation[j++]] = ntk.get_constant( false );
+        }
+
+        for ( auto j = 0u; j < num_vars; ++j )
+        {
+          if ( ( negation >> j ) & 1 )
+          {
+            leaves[j] = !leaves[j];
+          }
+        }
+
+        {
+          /* measure the MFFC contained in the cut */
+          int32_t mffc_size = measure_mffc_deref( n, cut );
+
+          for ( auto const& dag : *structures )
+          {
+            auto [nodes_added, level] = evaluate_entry( n, db.get_node( dag.root ), leaves );
+            int32_t gain = mffc_size - nodes_added;
+
+            /* discard if dag.root and n are the same */
+            if ( ntk.node_to_index( n ) == db.value( db.get_node( dag.root ) ) >> 1 )
+              continue;
+
+            /* discard if no gain */
+            if ( gain < 0 || ( !ps.allow_zero_gain && gain == 0 ) )
+              continue;
+
+            /* discard if level increases */
+            if constexpr ( has_level_v<Ntk> )
+            {
+              if ( ps.preserve_depth && level > required[n] )
+                continue;
+            }
+
+            if ( ( gain > best_gain ) || ( gain == best_gain && level < best_level ) )
+            {
+              ++_candidates;
+              best_gain = gain;
+              best_signal = dag.root;
+              best_leaves = leaves;
+              best_phase = phase;
+              best_level = level;
+            }
+
+            if ( !ps.allow_multiple_structures )
+              break;
+          }
+
+          /* restore contained MFFC */
+          measure_mffc_ref( n, cut );
+          ++cut_index;
+
+          if ( cut->size() == 0 || ( cut->size() == 1 && *cut->begin() != ntk.node_to_index( n ) ) )
+            break;
+        }
+      }
+
+      if ( best_gain > 0 || ( ps.allow_zero_gain && best_gain == 0 ) )
+      {
+        /* replace node wth the new structure */
+        topo_view topo{ db, best_signal };
+        auto new_f = cleanup_dangling( topo, ntk, best_leaves.begin(), best_leaves.end() ).front();
+
+        assert( n != ntk.get_node( new_f ) );
+
+        _estimated_gain += best_gain;
+        ntk.substitute_node_no_restrash( n, new_f ^ best_phase );
+
+        if constexpr ( has_level_v<Ntk> )
+        {
+          /* propagate new required to leaves */
+          if ( ps.preserve_depth )
+          {
+            propagate_required_rec( ntk.node_to_index( n ), ntk.get_node( new_f ), size, required[n] );
+            assert( ntk.level( ntk.get_node( new_f ) ) <= required[n] );
+          }
+        }
+
+        clear_cuts_fanout_rec( cuts, cut_manager, ntk.get_node( new_f ) );
+      }
+    } );
   }
 
-private:
   int32_t measure_mffc_ref( node<Ntk> const& n, cut_t const* cut )
   {
     /* reference cut leaves */
@@ -673,7 +918,7 @@ void rewrite( Ntk& ntk, Library&& library, rewrite_params const& ps = {}, rewrit
 
   rewrite_stats st;
 
-  if ( ps.preserve_depth )
+  if ( ps.preserve_depth || ps.use_dont_cares )
   {
     using depth_view_t = depth_view<Ntk, NodeCostFn>;
     depth_view_t depth_ntk{ ntk };
diff --git a/include/mockturtle/networks/block.hpp b/include/mockturtle/networks/block.hpp
new file mode 100644
index 000000000..a31ec7c09
--- /dev/null
+++ b/include/mockturtle/networks/block.hpp
@@ -0,0 +1,976 @@
+/* mockturtle: C++ logic network library
+ * Copyright (C) 2018-2023  EPFL
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*!
+  \file block.hpp
+  \brief Block logic network implementation with multi-output support
+
+  \author Alessandro Tempia Calvino
+*/
+
+#pragma once
+
+#include "../traits.hpp"
+#include "../utils/algorithm.hpp"
+#include "../utils/truth_table_cache.hpp"
+#include "detail/foreach.hpp"
+#include "events.hpp"
+#include "storage.hpp"
+
+#include <kitty/constructors.hpp>
+#include <kitty/dynamic_truth_table.hpp>
+
+#include <algorithm>
+#include <memory>
+
+namespace mockturtle
+{
+
+struct block_storage_data
+{
+  truth_table_cache<kitty::dynamic_truth_table> cache;
+};
+
+/*! \brief k-LUT node
+ *
+ * `data[0].h1`  : Application-specific value
+ * `data[1].h1`  : Visited flags
+ * `data[1].h2`  : Total fan-out size (we use MSB to indicate whether a node is dead)
+ * `data[2+i].h1`: Function literal in truth table cache for the fanout
+ * `data[2+i].h2`: Fan-out size
+ *
+ */
+struct block_storage_node : block_fanin_node<2>
+{
+  block_storage_node()
+  {
+    data = decltype( data )( 3 );
+  }
+
+  bool operator==( block_storage_node const& other ) const
+  {
+    if ( data.size() != other.data.size() )
+      return false;
+
+    for ( auto i = 2; i < data.size() + 2; ++i )
+      if ( ( data[i].h1 != other.data[i].h1 ) || ( children != other.children ) )
+        return false;
+
+    return true;
+  }
+};
+
+/*! \brief k-LUT storage container
+
+  ...
+*/
+using block_storage = storage_no_hash<block_storage_node, block_storage_data>;
+
+class block_network
+{
+public:
+#pragma region Types and constructors
+  static constexpr auto min_fanin_size = 1;
+  static constexpr auto max_fanin_size = 32;
+  static constexpr auto min_fanout_size = 1;
+  static constexpr auto max_fanout_size = 2;
+  static constexpr auto fanout_signal_bits = 1;
+
+  using base_type = block_network;
+  using storage = std::shared_ptr<block_storage>;
+  using node = uint64_t;
+
+  struct signal
+  {
+    signal() = default;
+
+    signal( uint64_t index, uint64_t complement )
+        : complement( complement ), output( 0 ), index( index )
+    {
+    }
+
+    signal( uint32_t index )
+        : complement( 0 ), output( 0 ), index( index )
+    {
+    }
+
+    signal( uint64_t index, uint64_t complement, uint64_t output )
+        : complement( complement ), output( output ), index( index )
+    {
+    }
+
+    explicit signal( uint64_t data )
+        : data( data )
+    {
+    }
+
+    signal( block_storage::node_type::pointer_type const& p )
+        : complement( p.weight & 1 ), output( p.weight >> 1 ), index( p.index )
+    {
+    }
+
+    union
+    {
+      struct
+      {
+        uint64_t complement : 1;
+        uint64_t output : fanout_signal_bits;
+        uint64_t index : 63 - fanout_signal_bits;
+      };
+      uint64_t data;
+    };
+
+    signal operator!() const
+    {
+      return signal( data ^ 1 );
+    }
+
+    signal operator+() const
+    {
+      return { index, output, 0 };
+    }
+
+    signal operator-() const
+    {
+      return { index, output, 1 };
+    }
+
+    signal operator^( bool complement ) const
+    {
+      return signal( data ^ ( complement ? 1 : 0 ) );
+    }
+
+    bool operator==( signal const& other ) const
+    {
+      return data == other.data;
+    }
+
+    bool operator!=( signal const& other ) const
+    {
+      return data != other.data;
+    }
+
+    bool operator<( signal const& other ) const
+    {
+      return data < other.data;
+    }
+
+    operator block_storage::node_type::pointer_type() const
+    {
+      return { index, ( output << 1 ) | complement };
+    }
+
+    operator uint64_t() const
+    {
+      return data;
+    }
+
+#if __cplusplus > 201703L
+    bool operator==( block_storage::node_type::pointer_type const& other ) const
+    {
+      return data == other.data;
+    }
+#endif
+  };
+
+  block_network()
+      : _storage( std::make_shared<block_storage>() ),
+        _events( std::make_shared<decltype( _events )::element_type>() )
+  {
+    _init();
+  }
+
+  block_network( std::shared_ptr<block_storage> storage )
+      : _storage( storage ),
+        _events( std::make_shared<decltype( _events )::element_type>() )
+  {
+    _init();
+  }
+
+  block_network clone() const
+  {
+    return { std::make_shared<block_storage>( *_storage ) };
+  }
+
+protected:
+  inline void _init()
+  {
+    /* reserve the second node for constant 1 */
+    _storage->nodes.emplace_back();
+
+    /* reserve some truth tables for nodes */
+    kitty::dynamic_truth_table tt_zero( 0 );
+    _storage->data.cache.insert( tt_zero );
+
+    static uint64_t _not = 0x1;
+    kitty::dynamic_truth_table tt_not( 1 );
+    kitty::create_from_words( tt_not, &_not, &_not + 1 );
+    _storage->data.cache.insert( tt_not );
+
+    static uint64_t _and = 0x8;
+    kitty::dynamic_truth_table tt_and( 2 );
+    kitty::create_from_words( tt_and, &_and, &_and + 1 );
+    _storage->data.cache.insert( tt_and );
+
+    static uint64_t _or = 0xe;
+    kitty::dynamic_truth_table tt_or( 2 );
+    kitty::create_from_words( tt_or, &_or, &_or + 1 );
+    _storage->data.cache.insert( tt_or );
+
+    static uint64_t _lt = 0x4;
+    kitty::dynamic_truth_table tt_lt( 2 );
+    kitty::create_from_words( tt_lt, &_lt, &_lt + 1 );
+    _storage->data.cache.insert( tt_lt );
+
+    static uint64_t _le = 0xd;
+    kitty::dynamic_truth_table tt_le( 2 );
+    kitty::create_from_words( tt_le, &_le, &_le + 1 );
+    _storage->data.cache.insert( tt_le );
+
+    static uint64_t _xor = 0x6;
+    kitty::dynamic_truth_table tt_xor( 2 );
+    kitty::create_from_words( tt_xor, &_xor, &_xor + 1 );
+    _storage->data.cache.insert( tt_xor );
+
+    static uint64_t _maj = 0xe8;
+    kitty::dynamic_truth_table tt_maj( 3 );
+    kitty::create_from_words( tt_maj, &_maj, &_maj + 1 );
+    _storage->data.cache.insert( tt_maj );
+
+    static uint64_t _ite = 0xd8;
+    kitty::dynamic_truth_table tt_ite( 3 );
+    kitty::create_from_words( tt_ite, &_ite, &_ite + 1 );
+    _storage->data.cache.insert( tt_ite );
+
+    static uint64_t _xor3 = 0x96;
+    kitty::dynamic_truth_table tt_xor3( 3 );
+    kitty::create_from_words( tt_xor3, &_xor3, &_xor3 + 1 );
+    _storage->data.cache.insert( tt_xor3 );
+
+    /* truth tables for constants */
+    _storage->nodes[0].data[2].h1 = 0;
+    _storage->nodes[1].data[2].h1 = 1;
+  }
+#pragma endregion
+
+#pragma region Primary I / O and constants
+public:
+  signal get_constant( bool value = false ) const
+  {
+    return value ? signal( 1, 0 ) : signal( 0, 0 );
+  }
+
+  signal create_pi()
+  {
+    const auto index = _storage->nodes.size();
+    _storage->nodes.emplace_back();
+    _storage->inputs.emplace_back( index );
+    _storage->nodes[index].data[2].h1 = 2;
+    return { index, 0 };
+  }
+
+  uint32_t create_po( signal const& f )
+  {
+    /* increase ref-count to children */
+    _storage->nodes[f.index].data[1].h2++;
+    _storage->nodes[f.index].data[2 + f.output].h2++;
+    auto const po_index = static_cast<uint32_t>( _storage->outputs.size() );
+    _storage->outputs.emplace_back( f.index, ( f.output << 1 ) | f.complement );
+    return po_index;
+  }
+
+  bool is_combinational() const
+  {
+    return true;
+  }
+
+  bool is_multioutput( node const& n ) const
+  {
+    return _storage->nodes[n].data.size() > 3;
+  }
+
+  bool is_constant( node const& n ) const
+  {
+    return n <= 1;
+  }
+
+  bool is_ci( node const& n ) const
+  {
+    return n > 1 && _storage->nodes[n].children.size() == 0u;
+  }
+
+  bool is_pi( node const& n ) const
+  {
+    return n > 1 && _storage->nodes[n].children.size() == 0u;
+  }
+
+  bool constant_value( node const& n ) const
+  {
+    return n != 0;
+  }
+#pragma endregion
+
+#pragma region Create unary functions
+  signal create_buf( signal const& a )
+  {
+    return a;
+  }
+
+  signal create_not( signal const& a )
+  {
+    return _create_node( { a }, 3 );
+  }
+#pragma endregion
+
+#pragma region Create binary functions
+  signal create_and( signal a, signal b )
+  {
+    return _create_node( { a, b }, 4 );
+  }
+
+  signal create_nand( signal a, signal b )
+  {
+    return _create_node( { a, b }, 5 );
+  }
+
+  signal create_or( signal a, signal b )
+  {
+    return _create_node( { a, b }, 6 );
+  }
+
+  signal create_lt( signal a, signal b )
+  {
+    return _create_node( { a, b }, 8 );
+  }
+
+  signal create_le( signal a, signal b )
+  {
+    return _create_node( { a, b }, 11 );
+  }
+
+  signal create_xor( signal a, signal b )
+  {
+    return _create_node( { a, b }, 12 );
+  }
+#pragma endregion
+
+#pragma region Create ternary functions
+  signal create_maj( signal a, signal b, signal c )
+  {
+    return _create_node( { a, b, c }, 14 );
+  }
+
+  signal create_ite( signal a, signal b, signal c )
+  {
+    return _create_node( { a, b, c }, 16 );
+  }
+
+  signal create_xor3( signal a, signal b, signal c )
+  {
+    return _create_node( { a, b, c }, 18 );
+  }
+
+  signal create_ha( signal a, signal b )
+  {
+    return _create_node( { a, b }, { 4, 12 } );
+  }
+
+  signal create_hai( signal a, signal b )
+  {
+    return _create_node( { a, b }, { 5, 13 } );
+  }
+
+  signal create_fa( signal a, signal b, signal c )
+  {
+    return _create_node( { a, b, c }, { 14, 18 } );
+  }
+
+  signal create_fai( signal a, signal b, signal c )
+  {
+    return _create_node( { a, b, c }, { 15, 19 } );
+  }
+#pragma endregion
+
+#pragma region Create nary functions
+  signal create_nary_and( std::vector<signal> const& fs )
+  {
+    return tree_reduce( fs.begin(), fs.end(), get_constant( true ), [this]( auto const& a, auto const& b ) { return create_and( a, b ); } );
+  }
+
+  signal create_nary_or( std::vector<signal> const& fs )
+  {
+    return tree_reduce( fs.begin(), fs.end(), get_constant( false ), [this]( auto const& a, auto const& b ) { return create_or( a, b ); } );
+  }
+
+  signal create_nary_xor( std::vector<signal> const& fs )
+  {
+    return tree_reduce( fs.begin(), fs.end(), get_constant( false ), [this]( auto const& a, auto const& b ) { return create_xor( a, b ); } );
+  }
+#pragma endregion
+
+#pragma region Create arbitrary functions
+  signal _create_node( std::vector<signal> const& children, uint32_t literal )
+  {
+    storage::element_type::node_type node;
+    std::copy( children.begin(), children.end(), std::back_inserter( node.children ) );
+    node.data[2].h1 = literal;
+
+    const auto index = _storage->nodes.size();
+    _storage->nodes.push_back( node );
+
+    /* increase ref-count to children */
+    for ( auto c : children )
+    {
+      _storage->nodes[c.index].data[1].h2++; /* TODO: increase fanout count for output */
+      _storage->nodes[c.index].data[2 + c.output].h2++;
+    }
+
+    set_value( index, 0 );
+
+    for ( auto const& fn : _events->on_add )
+    {
+      ( *fn )( index );
+    }
+
+    return { index, 0 };
+  }
+
+  signal _create_node( std::vector<signal> const& children, std::vector<uint32_t> const& literals )
+  {
+    storage::element_type::node_type node;
+    std::copy( children.begin(), children.end(), std::back_inserter( node.children ) );
+
+    node.data = decltype( node.data )( 2 + literals.size() );
+
+    for ( auto i = 0; i < literals.size(); ++i )
+      node.data[2 + i].h1 = literals[i];
+
+    const auto index = _storage->nodes.size();
+    _storage->nodes.push_back( node );
+
+    /* increase ref-count to children */
+    for ( auto c : children )
+    {
+      _storage->nodes[c.index].data[1].h2++;
+      _storage->nodes[c.index].data[2 + c.output].h2++;
+    }
+
+    set_value( index, 0 );
+
+    for ( auto const& fn : _events->on_add )
+    {
+      ( *fn )( index );
+    }
+
+    return { index, 0 };
+  }
+
+  signal create_node( std::vector<signal> const& children, kitty::dynamic_truth_table const& function )
+  {
+    if ( children.size() == 0u )
+    {
+      assert( function.num_vars() == 0u );
+      return get_constant( !kitty::is_const0( function ) );
+    }
+    return _create_node( children, _storage->data.cache.insert( function ) );
+  }
+
+  signal create_node( std::vector<signal> const& children, std::vector<kitty::dynamic_truth_table> const& functions )
+  {
+    assert( functions.size() > 0 );
+
+    if ( children.size() == 0u )
+    {
+      assert( functions[0].num_vars() == 0u );
+      return get_constant( !kitty::is_const0( functions[0] ) );
+    }
+    std::vector<uint32_t> literals;
+    for ( auto const& tt : functions )
+      literals.push_back( _storage->data.cache.insert( tt ) );
+
+    return _create_node( children, literals );
+  }
+
+  signal clone_node( block_network const& other, node const& source, std::vector<signal> const& children )
+  {
+    assert( !children.empty() );
+    if ( other.is_multioutput( source ) )
+    {
+      std::vector<kitty::dynamic_truth_table> tts;
+      for ( auto i = 2; i < other._storage->nodes[source].data.size(); ++i )
+        tts.push_back( other._storage->data.cache[other._storage->nodes[source].data[i].h1] );
+      return create_node( children, tts );
+    }
+    else
+    {
+      const auto tt = other._storage->data.cache[other._storage->nodes[source].data[2].h1];
+      return create_node( children, tt );
+    }
+  }
+#pragma endregion
+
+#pragma region Restructuring
+  // void substitute_node( node const& old_node, signal const& new_signal )
+  // {
+  //   /* find all parents from old_node */
+  //   for ( auto i = 0u; i < _storage->nodes.size(); ++i )
+  //   {
+  //     auto& n = _storage->nodes[i];
+  //     for ( auto& child : n.children )
+  //     {
+  //       if ( child == old_node )
+  //       {
+  //         std::vector<signal> old_children( n.children.size() );
+  //         std::transform( n.children.begin(), n.children.end(), old_children.begin(), []( auto c ) { return c.index; } );
+  //         child = new_signal;
+
+  //         // increment fan-out of new node
+  //         _storage->nodes[new_signal].data[0].h1++;
+
+  //         for ( auto const& fn : _events->on_modified )
+  //         {
+  //           ( *fn )( i, old_children );
+  //         }
+  //       }
+  //     }
+  //   }
+
+  //   /* check outputs */
+  //   for ( auto& output : _storage->outputs )
+  //   {
+  //     if ( output == old_node )
+  //     {
+  //       output = new_signal;
+
+  //       // increment fan-out of new node
+  //       _storage->nodes[new_signal].data[0].h1++;
+  //     }
+  //   }
+
+  //   // reset fan-out of old node
+  //   _storage->nodes[old_node].data[0].h1 = 0;
+  // }
+
+  inline bool is_dead( node const& n ) const
+  {
+    return false;
+  }
+#pragma endregion
+
+#pragma region Structural properties
+  auto size() const
+  {
+    return static_cast<uint32_t>( _storage->nodes.size() );
+  }
+
+  auto num_cis() const
+  {
+    return static_cast<uint32_t>( _storage->inputs.size() );
+  }
+
+  auto num_cos() const
+  {
+    return static_cast<uint32_t>( _storage->outputs.size() );
+  }
+
+  auto num_pis() const
+  {
+    return static_cast<uint32_t>( _storage->inputs.size() );
+  }
+
+  auto num_pos() const
+  {
+    return static_cast<uint32_t>( _storage->outputs.size() );
+  }
+
+  auto num_gates() const
+  {
+    return static_cast<uint32_t>( _storage->nodes.size() - _storage->inputs.size() - 2 );
+  }
+
+  uint32_t num_outputs( node const& n ) const
+  {
+    return static_cast<uint32_t>( _storage->nodes[n].data.size() - 2 );
+  }
+
+  uint32_t fanin_size( node const& n ) const
+  {
+    return static_cast<uint32_t>( _storage->nodes[n].children.size() );
+  }
+
+  uint32_t fanout_size( node const& n ) const
+  {
+    return _storage->nodes[n].data[1].h2;
+  }
+
+  uint32_t incr_fanout_size( node const& n ) const
+  {
+    return _storage->nodes[n].data[1].h2++;
+  }
+
+  uint32_t decr_fanout_size( node const& n ) const
+  {
+    return --_storage->nodes[n].data[1].h2;
+  }
+
+  uint32_t incr_fanout_size_pin( node const& n, uint32_t pin_index ) const
+  {
+    return _storage->nodes[n].data[2 + pin_index].h2++;
+  }
+
+  uint32_t decr_fanout_size_pin( node const& n, uint32_t pin_index ) const
+  {
+    return --_storage->nodes[n].data[2 + pin_index].h2;
+  }
+
+  uint32_t fanout_size_pin( node const& n, uint32_t pin_index ) const
+  {
+    return _storage->nodes[n].data[2 + pin_index].h1;
+  }
+
+  bool is_function( node const& n ) const
+  {
+    return n > 1 && !is_ci( n );
+  }
+
+  bool is_and( node const& n ) const
+  {
+    return n > 1 && _storage->nodes[n].data.size() == 3 && _storage->nodes[n].data[2].h1 == 4;
+  }
+
+  bool is_and( signal const& f ) const
+  {
+    return f.index > 1 && _storage->nodes[f.index].data[2 + f.output].h1 == 4;
+  }
+
+  bool is_or( node const& n ) const
+  {
+    return n > 1 && _storage->nodes[n].data.size() == 3 && _storage->nodes[n].data[2].h1 == 6;
+  }
+
+  bool is_or( signal const& f ) const
+  {
+    return f.index > 1 && _storage->nodes[f.index].data[2 + f.output].h1 == 6;
+  }
+
+  bool is_xor( node const& n ) const
+  {
+    return n > 1 && _storage->nodes[n].data.size() == 3 && _storage->nodes[n].data[2].h1 == 12;
+  }
+
+  bool is_xor( signal const& f ) const
+  {
+    return f.index > 1 && _storage->nodes[f.index].data[2 + f.output].h1 == 12;
+  }
+
+  bool is_maj( node const& n ) const
+  {
+    return n > 1 && _storage->nodes[n].data.size() == 3 && _storage->nodes[n].data[2].h1 == 14;
+  }
+
+  bool is_maj( signal const& f ) const
+  {
+    return f.index > 1 && _storage->nodes[f.index].data[2 + f.output].h1 == 14;
+  }
+
+  bool is_ite( node const& n ) const
+  {
+    return n > 1 && _storage->nodes[n].data.size() == 3 && _storage->nodes[n].data[2].h1 == 16;
+  }
+
+  bool is_ite( signal const& f ) const
+  {
+    return f.index > 1 && _storage->nodes[f.index].data[2 + f.output].h1 == 16;
+  }
+
+  bool is_xor3( node const& n ) const
+  {
+    return n > 1 && _storage->nodes[n].data.size() == 3 && _storage->nodes[n].data[2].h1 == 18;
+  }
+
+  bool is_xor3( signal const& f ) const
+  {
+    return f.index > 1 && _storage->nodes[f.index].data[2 + f.output].h1 == 18;
+  }
+#pragma endregion
+
+#pragma region Functional properties
+  kitty::dynamic_truth_table node_function( const node& n ) const
+  {
+    return _storage->data.cache[_storage->nodes[n].data[2].h1];
+  }
+
+  kitty::dynamic_truth_table node_function_pin( const node& n, uint32_t pin_index ) const
+  {
+    return _storage->data.cache[_storage->nodes[n].data[2 + pin_index].h1];
+  }
+#pragma endregion
+
+#pragma region Nodes and signals
+  node get_node( signal const& f ) const
+  {
+    return f.index;
+  }
+
+  signal make_signal( node const& n ) const
+  {
+    return { n, 0 };
+  }
+
+  signal make_signal( node const& n, uint32_t output_pin ) const
+  {
+    return { n, 0, output_pin };
+  }
+
+  bool is_complemented( signal const& f ) const
+  {
+    return f.complement ? true : false;
+  }
+
+  uint32_t get_output_pin( signal const& f ) const
+  {
+    return static_cast<uint32_t>( f.output );
+  }
+
+  signal next_output_pin( signal const& f ) const
+  {
+    return { f.index, f.complement, f.output + 1 };
+  }
+
+  uint32_t node_to_index( node const& n ) const
+  {
+    return static_cast<uint32_t>( n );
+  }
+
+  node index_to_node( uint32_t index ) const
+  {
+    return index;
+  }
+
+  node ci_at( uint32_t index ) const
+  {
+    assert( index < _storage->inputs.size() );
+    return *( _storage->inputs.begin() + index );
+  }
+
+  signal co_at( uint32_t index ) const
+  {
+    assert( index < _storage->outputs.size() );
+    return *( _storage->outputs.begin() + index );
+  }
+
+  node pi_at( uint32_t index ) const
+  {
+    assert( index < _storage->inputs.size() );
+    return *( _storage->inputs.begin() + index );
+  }
+
+  signal po_at( uint32_t index ) const
+  {
+    assert( index < _storage->outputs.size() );
+    return *( _storage->outputs.begin() + index );
+  }
+#pragma endregion
+
+#pragma region Node and signal iterators
+  template<typename Fn>
+  void foreach_node( Fn&& fn ) const
+  {
+    auto r = range<uint64_t>( _storage->nodes.size() );
+    detail::foreach_element( r.begin(), r.end(), fn );
+  }
+
+  template<typename Fn>
+  void foreach_ci( Fn&& fn ) const
+  {
+    detail::foreach_element( _storage->inputs.begin(), _storage->inputs.end(), fn );
+  }
+
+  template<typename Fn>
+  void foreach_co( Fn&& fn ) const
+  {
+    using IteratorType = decltype( _storage->outputs.begin() );
+    detail::foreach_element_transform<IteratorType, signal>(
+        _storage->outputs.begin(), _storage->outputs.end(), []( auto f ) { return signal( f ); }, fn );
+  }
+
+  template<typename Fn>
+  void foreach_pi( Fn&& fn ) const
+  {
+    detail::foreach_element( _storage->inputs.begin(), _storage->inputs.end(), fn );
+  }
+
+  template<typename Fn>
+  void foreach_po( Fn&& fn ) const
+  {
+    using IteratorType = decltype( _storage->outputs.begin() );
+    detail::foreach_element_transform<IteratorType, signal>(
+        _storage->outputs.begin(), _storage->outputs.end(), []( auto f ) { return signal( f ); }, fn );
+  }
+
+  template<typename Fn>
+  void foreach_gate( Fn&& fn ) const
+  {
+    auto r = range<uint64_t>( 2u, _storage->nodes.size() ); /* start from 2 to avoid constants */
+    detail::foreach_element_if(
+        r.begin(), r.end(),
+        [this]( auto n ) { return !is_ci( n ); },
+        fn );
+  }
+
+  template<typename Fn>
+  void foreach_fanin( node const& n, Fn&& fn ) const
+  {
+    if ( n == 0 || is_ci( n ) )
+      return;
+
+    using IteratorType = decltype( _storage->outputs.begin() );
+    detail::foreach_element_transform<IteratorType, signal>(
+        _storage->nodes[n].children.begin(), _storage->nodes[n].children.end(), []( auto f ) { return signal( f ); }, fn );
+  }
+#pragma endregion
+
+#pragma region Simulate values // (Works on single-output gates only)
+  template<typename Iterator>
+  iterates_over_t<Iterator, bool>
+  compute( node const& n, Iterator begin, Iterator end ) const
+  {
+    uint32_t index{ 0 };
+    auto it = _storage->nodes[n].children.begin();
+    while ( begin != end )
+    {
+      index <<= 1;
+      index ^= *begin++ ? ( ~( it->weight ) & 1 ) : ( ( it->weight ) & 1 );
+      ++it;
+    }
+    return kitty::get_bit( _storage->data.cache[_storage->nodes[n].data[2].h1], index );
+  }
+
+  template<typename Iterator>
+  iterates_over_truth_table_t<Iterator>
+  compute( node const& n, Iterator begin, Iterator end ) const
+  {
+    const auto nfanin = _storage->nodes[n].children.size();
+
+    std::vector<typename std::iterator_traits<Iterator>::value_type> tts( begin, end );
+
+    assert( nfanin != 0 );
+    assert( tts.size() == nfanin );
+
+    /* adjust polarities */
+    for ( auto j = 0u; j < nfanin; ++j )
+    {
+      if ( _storage->nodes[n].children[j].weight & 1 )
+        tts[j] = ~tts[j];
+    }
+
+    /* resulting truth table has the same size as any of the children */
+    auto result = tts.front().construct();
+    const auto gate_tt = _storage->data.cache[_storage->nodes[n].data[2].h1];
+
+    for ( uint32_t i = 0u; i < static_cast<uint32_t>( result.num_bits() ); ++i )
+    {
+      uint32_t pattern = 0u;
+      for ( auto j = 0u; j < nfanin; ++j )
+      {
+        pattern |= kitty::get_bit( tts[j], i ) << j;
+      }
+      if ( kitty::get_bit( gate_tt, pattern ) )
+      {
+        kitty::set_bit( result, i );
+      }
+    }
+
+    return result;
+  }
+#pragma endregion
+
+#pragma region Custom node values
+  void clear_values() const
+  {
+    std::for_each( _storage->nodes.begin(), _storage->nodes.end(), []( auto& n ) { n.data[0].h1 = 0; } );
+  }
+
+  uint32_t value( node const& n ) const
+  {
+    return _storage->nodes[n].data[0].h1;
+  }
+
+  void set_value( node const& n, uint32_t v ) const
+  {
+    _storage->nodes[n].data[0].h1 = v;
+  }
+
+  uint32_t incr_value( node const& n ) const
+  {
+    return static_cast<uint32_t>( _storage->nodes[n].data[0].h1++ );
+  }
+
+  uint32_t decr_value( node const& n ) const
+  {
+    return static_cast<uint32_t>( --_storage->nodes[n].data[0].h1 );
+  }
+#pragma endregion
+
+#pragma region Visited flags
+  void clear_visited() const
+  {
+    std::for_each( _storage->nodes.begin(), _storage->nodes.end(), []( auto& n ) { n.data[1].h1 = 0; } );
+  }
+
+  auto visited( node const& n ) const
+  {
+    return _storage->nodes[n].data[1].h1;
+  }
+
+  void set_visited( node const& n, uint32_t v ) const
+  {
+    _storage->nodes[n].data[1].h1 = v;
+  }
+
+  uint32_t trav_id() const
+  {
+    return _storage->trav_id;
+  }
+
+  void incr_trav_id() const
+  {
+    ++_storage->trav_id;
+  }
+#pragma endregion
+
+#pragma region General methods
+  auto& events() const
+  {
+    return *_events;
+  }
+#pragma endregion
+
+public:
+  std::shared_ptr<block_storage> _storage;
+  std::shared_ptr<network_events<base_type>> _events;
+};
+
+} // namespace mockturtle
diff --git a/include/mockturtle/networks/klut.hpp b/include/mockturtle/networks/klut.hpp
index 1aae3d4fe..96a7a6f81 100644
--- a/include/mockturtle/networks/klut.hpp
+++ b/include/mockturtle/networks/klut.hpp
@@ -27,6 +27,7 @@
   \file klut.hpp
   \brief k-LUT logic network implementation
 
+  \author Alessandro Tempia Calvino
   \author Andrea Costamagna
   \author Heinz Riener
   \author Marcel Walter
@@ -114,7 +115,7 @@ class klut_network
   inline void _init()
   {
     /* already initialized */
-    if ( _storage->nodes.size() > 1 ) 
+    if ( _storage->nodes.size() > 1 )
       return;
 
     /* reserve the second node for constant 1 */
@@ -458,6 +459,16 @@ class klut_network
     return _storage->nodes[n].data[0].h1;
   }
 
+  uint32_t incr_fanout_size( node const& n ) const
+  {
+    return _storage->nodes[n].data[0].h1++;
+  }
+
+  uint32_t decr_fanout_size( node const& n ) const
+  {
+    return --_storage->nodes[n].data[0].h1;
+  }
+
   bool is_function( node const& n ) const
   {
     return n > 1 && !is_ci( n );
@@ -601,7 +612,7 @@ class klut_network
   {
     const auto nfanin = _storage->nodes[n].children.size();
 
-    std::vector<typename Iterator::value_type> tts( begin, end );
+    std::vector<typename std::iterator_traits<Iterator>::value_type> tts( begin, end );
 
     assert( nfanin != 0 );
     assert( tts.size() == nfanin );
diff --git a/include/mockturtle/networks/storage.hpp b/include/mockturtle/networks/storage.hpp
index b6493641e..85a2b42f6 100644
--- a/include/mockturtle/networks/storage.hpp
+++ b/include/mockturtle/networks/storage.hpp
@@ -145,6 +145,20 @@ struct mixed_fanin_node
   }
 };
 
+template<int PointerFieldSize = 0>
+struct block_fanin_node
+{
+  using pointer_type = node_pointer<PointerFieldSize>;
+
+  std::vector<pointer_type> children;
+  std::vector<cauint64_t> data;
+
+  bool operator==( block_fanin_node<PointerFieldSize> const& other ) const
+  {
+    return children == other.children;
+  }
+};
+
 /*! \brief Hash function for 64-bit word */
 inline uint64_t hash_block( uint64_t word )
 {
diff --git a/include/mockturtle/traits.hpp b/include/mockturtle/traits.hpp
index 36e788a68..29a4738a1 100644
--- a/include/mockturtle/traits.hpp
+++ b/include/mockturtle/traits.hpp
@@ -1,5 +1,5 @@
 /* mockturtle: C++ logic network library
- * Copyright (C) 2018-2022  EPFL
+ * Copyright (C) 2018-2023  EPFL
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
@@ -288,6 +288,21 @@ template<class Ntk>
 inline constexpr bool has_is_ro_v = has_is_ro<Ntk>::value;
 #pragma endregion
 
+#pragma region has_is_ro
+template<class Ntk, class = void>
+struct has_is_multioutput : std::false_type
+{
+};
+
+template<class Ntk>
+struct has_is_multioutput<Ntk, std::void_t<decltype( std::declval<Ntk>().is_multioutput( std::declval<node<Ntk>>() ) )>> : std::true_type
+{
+};
+
+template<class Ntk>
+inline constexpr bool has_is_multioutput_v = has_is_multioutput<Ntk>::value;
+#pragma endregion
+
 #pragma region has_constant_value
 template<class Ntk, class = void>
 struct has_constant_value : std::false_type
@@ -948,6 +963,21 @@ template<class Ntk>
 inline constexpr bool has_fanin_size_v = has_fanin_size<Ntk>::value;
 #pragma endregion
 
+#pragma region has_num_outputs
+template<class Ntk, class = void>
+struct has_num_outputs : std::false_type
+{
+};
+
+template<class Ntk>
+struct has_num_outputs<Ntk, std::void_t<decltype( std::declval<Ntk>().num_outputs( std::declval<node<Ntk>>() ) )>> : std::true_type
+{
+};
+
+template<class Ntk>
+inline constexpr bool has_num_outputs_v = has_num_outputs<Ntk>::value;
+#pragma endregion
+
 #pragma region has_fanout_size
 template<class Ntk, class = void>
 struct has_fanout_size : std::false_type
@@ -1428,6 +1458,21 @@ template<class Ntk>
 inline constexpr bool has_node_function_v = has_node_function<Ntk>::value;
 #pragma endregion
 
+#pragma region has_node_function_pin
+template<class Ntk, class = void>
+struct has_node_function_pin : std::false_type
+{
+};
+
+template<class Ntk>
+struct has_node_function_pin<Ntk, std::void_t<decltype( std::declval<Ntk>().node_function_pin( std::declval<node<Ntk>>(), uint32_t() ) )>> : std::true_type
+{
+};
+
+template<class Ntk>
+inline constexpr bool has_node_function_pin_v = has_node_function_pin<Ntk>::value;
+#pragma endregion
+
 #pragma region has_get_node
 template<class Ntk, class = void>
 struct has_get_node : std::false_type
@@ -1458,6 +1503,21 @@ template<class Ntk>
 inline constexpr bool has_make_signal_v = has_make_signal<Ntk>::value;
 #pragma endregion
 
+#pragma region has_get_output_pin
+template<class Ntk, class = void>
+struct has_get_output_pin : std::false_type
+{
+};
+
+template<class Ntk>
+struct has_get_output_pin<Ntk, std::void_t<decltype( std::declval<Ntk>().get_output_pin( std::declval<signal<Ntk>>() ) )>> : std::true_type
+{
+};
+
+template<class Ntk>
+inline constexpr bool has_get_output_pin_v = has_get_output_pin<Ntk>::value;
+#pragma endregion
+
 #pragma region has_is_complemented
 template<class Ntk, class = void>
 struct has_is_complemented : std::false_type
@@ -2088,6 +2148,51 @@ template<class Ntk>
 inline constexpr bool has_get_binding_index_v = has_get_binding_index<Ntk>::value;
 #pragma endregion
 
+#pragma region has_add_binding
+template<class Ntk, class = void>
+struct has_add_binding : std::false_type
+{
+};
+
+template<class Ntk>
+struct has_add_binding<Ntk, std::void_t<decltype( std::declval<Ntk>().add_binding( std::declval<node<Ntk>>(), uint32_t() ) )>> : std::true_type
+{
+};
+
+template<class Ntk>
+inline constexpr bool has_add_binding_v = has_add_binding<Ntk>::value;
+#pragma endregion
+
+#pragma region has_select_dont_touch
+template<class Ntk, class = void>
+struct has_select_dont_touch : std::false_type
+{
+};
+
+template<class Ntk>
+struct has_select_dont_touch<Ntk, std::void_t<decltype( std::declval<Ntk>().select_dont_touch( std::declval<node<Ntk>>() ) )>> : std::true_type
+{
+};
+
+template<class Ntk>
+inline constexpr bool has_select_dont_touch_v = has_select_dont_touch<Ntk>::value;
+#pragma endregion
+
+#pragma region has_is_dont_touch
+template<class Ntk, class = void>
+struct has_is_dont_touch : std::false_type
+{
+};
+
+template<class Ntk>
+struct has_is_dont_touch<Ntk, std::void_t<decltype( std::declval<Ntk>().is_dont_touch( std::declval<node<Ntk>>() ) )>> : std::true_type
+{
+};
+
+template<class Ntk>
+inline constexpr bool has_is_dont_touch_v = has_is_dont_touch<Ntk>::value;
+#pragma endregion
+
 #pragma region has_clear_values
 template<class Ntk, class = void>
 struct has_clear_values : std::false_type
diff --git a/include/mockturtle/utils/algorithm.hpp b/include/mockturtle/utils/algorithm.hpp
index 631b2141d..6397fff25 100644
--- a/include/mockturtle/utils/algorithm.hpp
+++ b/include/mockturtle/utils/algorithm.hpp
@@ -27,6 +27,7 @@
   \file algorithm.hpp
   \brief STL-like algorithm extensions
 
+  \author Alessandro Tempia Calvino
   \author Heinz Riener
   \author Marcel Walter
   \author Mathias Soeken
@@ -142,4 +143,88 @@ constexpr auto range( T end )
   return range<T>( {}, end );
 }
 
+/*! \brief Performs the set union of two sorted sets.
+ *
+ * Compared to std::set_union, limits the copy to `limit`.
+ * Moreover, it returns the number of elements copied if the
+ * union operation is successful. Else, it returns -1.
+ *
+ */
+template<class InputIterator1, class InputIterator2, class OutputIterator>
+int32_t set_union_safe( InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, OutputIterator result, uint32_t limit )
+{
+  /* special case: sets are at the limit */
+  if ( std::distance( first1, last1 ) == limit && std::distance( first2, last2 ) == limit )
+  {
+    while ( first1 != last1 )
+    {
+      if ( *first1 != *first2 )
+        return -1;
+
+      *result = *first1;
+      ++first1;
+      ++first2;
+      ++result;
+    }
+
+    return static_cast<int32_t>( limit );
+  }
+
+  uint32_t size = 0;
+  while ( size < limit )
+  {
+    if ( first1 == last1 )
+    {
+      size += std::distance( first2, last2 );
+      if ( size <= limit )
+      {
+        std::copy( first2, last2, result );
+        return static_cast<int32_t>( size );
+      }
+      else
+      {
+        return -1;
+      }
+    }
+    else if ( first2 == last2 )
+    {
+      size += std::distance( first1, last1 );
+      if ( size <= limit )
+      {
+        std::copy( first1, last1, result );
+        return static_cast<int32_t>( size );
+      }
+      else
+      {
+        return -1;
+      }
+    }
+
+    if ( *first1 < *first2 )
+    {
+      *result = *first1;
+      ++first1;
+    }
+    else if ( *first2 < *first1 )
+    {
+      *result = *first2;
+      ++first2;
+    }
+    else
+    {
+      *result = *first1;
+      ++first1;
+      ++first2;
+    }
+
+    ++result;
+    ++size;
+  }
+
+  if ( std::distance( first1, last1 ) + std::distance( first2, last2 ) > 0 )
+    return -1;
+
+  return static_cast<int32_t>( size );
+}
+
 } /* namespace mockturtle */
\ No newline at end of file
diff --git a/include/mockturtle/utils/cuts.hpp b/include/mockturtle/utils/cuts.hpp
index 5eef5d171..0c82dec18 100644
--- a/include/mockturtle/utils/cuts.hpp
+++ b/include/mockturtle/utils/cuts.hpp
@@ -27,6 +27,7 @@
   \file cuts.hpp
   \brief Data structure for cuts
 
+  \author Alessandro Tempia Calvino
   \author Heinz Riener
   \author Mathias Soeken
 */
@@ -40,6 +41,8 @@
 
 #include <kitty/detail/mscfix.hpp>
 
+#include "algorithm.hpp"
+
 namespace mockturtle
 {
 
@@ -299,10 +302,10 @@ bool cut<MaxLeaves, T>::merge( cut const& that, cut& res, uint32_t cut_size ) co
     }
   }
 
-  auto it = std::set_union( begin(), end(), that.begin(), that.end(), res.begin() );
-  if ( auto length = std::distance( res.begin(), it ); length <= cut_size )
+  int32_t length = set_union_safe( begin(), end(), that.begin(), that.end(), res.begin(), cut_size );
+  if ( length >= 0 )
   {
-    res._cend = res._end = it;
+    res._cend = res._end = res.begin() + length;
     res._length = static_cast<uint32_t>( length );
     res._signature = _signature | that._signature;
     return true;
diff --git a/include/mockturtle/utils/include/supergate.hpp b/include/mockturtle/utils/include/supergate.hpp
new file mode 100644
index 000000000..2ff55c000
--- /dev/null
+++ b/include/mockturtle/utils/include/supergate.hpp
@@ -0,0 +1,92 @@
+/* mockturtle: C++ logic network library
+ * Copyright (C) 2018-2023  EPFL
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*!
+  \file supergate.hpp
+  \brief Defines the composed gate and supergate data structure.
+
+  \author Alessandro Tempia Calvino
+*/
+
+#pragma once
+
+#include <array>
+#include <vector>
+
+#include "../../io/genlib_reader.hpp"
+
+#include <kitty/dynamic_truth_table.hpp>
+
+namespace mockturtle
+{
+
+template<unsigned NInputs>
+struct composed_gate
+{
+  /* unique ID */
+  uint32_t id;
+
+  /* gate is a supergate */
+  bool is_super{ false };
+
+  /* pointer to the root library gate */
+  gate const* root{ nullptr };
+
+  /* support of the composed gate */
+  uint32_t num_vars{ 0 };
+
+  /* function */
+  kitty::dynamic_truth_table function;
+
+  /* area */
+  double area{ 0.0 };
+
+  /* pin-to-pin delays */
+  std::array<float, NInputs> tdelay{};
+
+  /* fanin gates */
+  std::vector<composed_gate<NInputs>*> fanin{};
+};
+
+template<unsigned NInputs>
+struct supergate
+{
+  /* pointer to the root gate */
+  composed_gate<NInputs> const* root{};
+
+  /* area */
+  double area{ 0.0 };
+
+  /* pin-to-pin delay */
+  std::array<float, NInputs> tdelay{};
+
+  /* np permutation vector */
+  std::vector<uint8_t> permutation{};
+
+  /* pin negations */
+  uint16_t polarity{ 0 };
+};
+
+} // namespace mockturtle
\ No newline at end of file
diff --git a/include/mockturtle/utils/standard_cell.hpp b/include/mockturtle/utils/standard_cell.hpp
new file mode 100644
index 000000000..917550dc0
--- /dev/null
+++ b/include/mockturtle/utils/standard_cell.hpp
@@ -0,0 +1,98 @@
+/* mockturtle: C++ logic network library
+ * Copyright (C) 2018-2023  EPFL
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*!
+  \file standard_cell.hpp
+  \brief Defines logic cells.
+
+  \author Alessandro Tempia Calvino
+*/
+
+#pragma once
+
+#include <unordered_map>
+#include <vector>
+
+#include "../io/genlib_reader.hpp"
+
+namespace mockturtle
+{
+
+struct standard_cell
+{
+  /* Unique name */
+  std::string name;
+
+  /* Unique ID */
+  uint32_t id;
+
+  /* Pointer to a gate representing each individual output */
+  std::vector<gate> gates;
+
+  /* Area */
+  double area;
+};
+
+/*! \brief Reconstruct standard cells from GENLIB gates.
+ *
+ * This function returns a vector of standard cells given
+ * GENLIB gates.
+ *
+   \verbatim embed:rst
+
+   Example
+
+   .. code-block:: c++
+
+      std::vector<gate> gates;
+      lorina::read_genlib( in, genlib_reader( gates ) );
+
+      // Extract standard cells
+      std::vector<standard_cell> cells = get_standard_cells( gates );
+   \endverbatim
+ */
+inline std::vector<standard_cell> get_standard_cells( std::vector<gate> const& gates )
+{
+  std::unordered_map<std::string, uint32_t> name_to_index;
+  std::vector<standard_cell> cells;
+
+  for ( gate const& g : gates )
+  {
+    if ( auto it = name_to_index.find( g.name ); it != name_to_index.end() )
+    {
+      /* add to existing cell (multi-output) */
+      cells[it->second].gates.push_back( g );
+    }
+    else
+    {
+      name_to_index[g.name] = cells.size();
+      cells.emplace_back( standard_cell{ g.name, static_cast<uint32_t>( cells.size() ), { g }, g.area } );
+    }
+  }
+
+  return cells;
+}
+
+} // namespace mockturtle
\ No newline at end of file
diff --git a/include/mockturtle/utils/struct_library.hpp b/include/mockturtle/utils/struct_library.hpp
new file mode 100644
index 000000000..0eb0ca8e6
--- /dev/null
+++ b/include/mockturtle/utils/struct_library.hpp
@@ -0,0 +1,1470 @@
+/* mockturtle: C++ logic network library
+ * Copyright (C) 2018-2023  EPFL
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*!
+  \file struct_library.hpp
+  \brief Implements utilities for structural matching
+
+  \author Alessandro Tempia Calvino
+  \author Gianluca Radi
+*/
+
+#pragma once
+
+#include <algorithm>
+#include <cassert>
+#include <numeric>
+#include <tuple>
+#include <unordered_map>
+#include <vector>
+
+#include <kitty/constructors.hpp>
+#include <kitty/decomposition.hpp>
+#include <kitty/dynamic_truth_table.hpp>
+#include <kitty/npn.hpp>
+#include <kitty/print.hpp>
+#include <kitty/static_truth_table.hpp>
+
+#include <parallel_hashmap/phmap.h>
+
+#include "../io/genlib_reader.hpp"
+#include "include/supergate.hpp"
+
+namespace mockturtle
+{
+
+/*! \brief Library of gates for structural matching
+ *
+ * This class creates a technology library from a set
+ * of input gates.
+ *
+ * Gates are processed to derive rules in the AIG format.
+ * Then, every rule and subrule gets a unique id and the AND table is built.
+ * Every gate gets a unique label comprehensive of its rule id and whether it is positive or negative.
+ *
+ * The template parameter `NInputs` selects the maximum number of variables
+ * allowed for a gate in the library.
+ *
+ * By default, `struct_library` is used in `tech_library` when NInputs is greater than 6.
+ *
+ *
+   \verbatim embed:rst
+
+   Example
+
+   .. code-block:: c++
+
+      std::vector<gate> gates;
+      lorina::read_genlib( "file.genlib", genlib_reader( gates ) );
+      // struct library
+      mockturtle::struct_library lib( gates );
+   \endverbatim
+ */
+
+template<unsigned NInputs = 9u>
+class struct_library
+{
+public:
+  enum class node_type
+  {
+    none,
+    zero_,
+    pi_,
+    and_,
+    or_,
+    mux_,
+    xor_
+  };
+
+  struct signal
+  {
+    union
+    {
+      struct
+      {
+        uint32_t inv : 1;
+        uint32_t index : 31;
+      };
+      uint32_t data;
+    };
+
+    bool operator==( signal const& other ) const
+    {
+      return data == other.data;
+    }
+  };
+
+  /*  struct for representing nodes in dsd decomposition */
+  struct dsd_node
+  {
+    node_type type;
+
+    uint32_t index;
+
+    std::vector<signal> fanin = {};
+  };
+
+  /* struct for labels to assign to gates */
+  struct label
+  {
+    union
+    {
+      struct
+      {
+        uint32_t inv : 1;
+        uint32_t index : 31;
+      };
+      uint32_t data;
+    };
+    bool operator==( label const& other ) const
+    {
+      return data == other.data;
+    }
+  };
+
+  struct signal_hash
+  {
+    std::size_t operator()( signal const& s ) const noexcept
+    {
+      return std::hash<uint32_t>{}( s.data );
+    }
+  };
+
+  struct tuple_s_hash
+  {
+    std::size_t operator()( std::tuple<signal, signal> const& t ) const noexcept
+    {
+      size_t h1 = signal_hash()( std::get<0>( t ) );
+      size_t h2 = signal_hash()( std::get<1>( t ) );
+      return (uint64_t)h1 ^ ( ( (uint64_t)h2 ) << 32 ); // or use boost::hash_combine
+    }
+  };
+
+private:
+  static constexpr uint32_t invalid_index = UINT32_MAX;
+  using supergates_list_t = std::vector<supergate<NInputs>>;
+  using composed_list_t = std::vector<composed_gate<NInputs>>;
+  using lib_rule = phmap::flat_hash_map<kitty::dynamic_truth_table, std::vector<dsd_node>, kitty::hash<kitty::dynamic_truth_table>>;
+  using rule = std::vector<dsd_node>;
+  using lib_table = phmap::flat_hash_map<std::tuple<signal, signal>, uint32_t, tuple_s_hash>;
+  using map_label_gate = std::unordered_map<uint32_t, supergates_list_t>;
+
+public:
+  explicit struct_library( std::vector<gate> const& gates )
+      : _gates( gates ),
+        _supergates(),
+        _dsd_map(),
+        _and_table(),
+        _label_to_gate()
+  {}
+
+public:
+  /*! \brief Construct the structural library.
+   *
+   * Generates the patterns for structural matching.
+   * Variable `min_vars` defines the minimum number of
+   * gate inputs considered for the library creation.
+   * 0 < min_vars < UINT32_MAX
+   */
+  void construct( uint32_t min_vars = 2u, bool verbose = false )
+  {
+    generate_library( min_vars, verbose );
+  }
+
+  /*! \brief Construct the structural library.
+   *
+   * Generates the patterns for structural matching.
+   */
+  const map_label_gate& get_struct_library() const
+  {
+    return _label_to_gate;
+  }
+
+  /*! \brief Get the pattern ID.
+   *
+   *  \param id1 first pattern id.
+   *  \param id2 second pattern id.
+   * Returns a pattern ID if found, UINT32_MAX otherwise given the
+   * children IDs. This function works with only AND operators.
+   */
+  const uint32_t get_pattern_id( uint32_t id1, uint32_t id2 ) const
+  {
+    signal l, r;
+    l.data = id1;
+    /* ignore input negations */
+    if ( l.data == 3 )
+      l.data = 2;
+    r.data = id2;
+    if ( r.data == 3 )
+      r.data = 2;
+    std::tuple<signal, signal> key;
+    if ( l.index <= r.index )
+      key = std::make_tuple( l, r );
+    else
+      key = std::make_tuple( r, l );
+    auto match = _and_table.find( key );
+    if ( match != _and_table.end() )
+      return match->second;
+    return UINT32_MAX;
+  }
+
+  /*! \brief Get the gates matching the pattern ID.
+   *
+   * Returns a list of gates that match the pattern ID.
+   */
+  const supergates_list_t* get_supergates_pattern( uint32_t id, bool phase ) const
+  {
+    auto match = _label_to_gate.find( ( id << 1 ) | ( phase ? 1 : 0 ) );
+    if ( match != _label_to_gate.end() )
+    {
+      return &( match->second );
+    }
+    return nullptr;
+  }
+
+  /*! \brief Print and table.
+   *
+   */
+  void print_and_table()
+  {
+    for ( auto elem : _and_table )
+    {
+      auto first0 = std::get<0>( elem.first );
+      auto first1 = std::get<1>( elem.first );
+      std::cout << "<" << ( first0.inv ? "!" : "" ) << first0.index;
+      std::cout << ", " << ( first1.inv ? "!" : "" ) << first1.index << "> ";
+      std::cout << elem.second << "\n";
+    }
+  }
+
+private:
+  void generate_library( uint32_t min_vars, bool verbose )
+  {
+    /* select and load gates */
+    _supergates.reserve( _gates.size() );
+    generate_composed_gates();
+
+    /* mark dominate gates */
+    std::vector<bool> skip_gates( _supergates.size(), false );
+    select_dominated_gates( skip_gates );
+
+    std::vector<uint32_t> indexes( _supergates.size() );
+    std::iota( indexes.begin(), indexes.end(), 0 );
+    uint32_t max_label = 1;
+    uint32_t gate_pol = 0; // polarity of AND equivalent gate
+    uint32_t shift = 0;
+
+    /* sort cells by increasing order of area */
+    std::sort( indexes.begin(), indexes.end(),
+               [&]( auto const& a, auto const& b ) -> bool {
+                 return _supergates[a].area < _supergates[b].area;
+               } );
+
+    for ( uint32_t const ind : indexes )
+    {
+      composed_gate<NInputs> const& gate = _supergates[ind];
+
+      if ( gate.num_vars < 2 || skip_gates[ind] )
+        continue;
+
+      /* DSD decomposition */
+      rule rule = {};
+      std::vector<uint32_t> support = {};
+      for ( uint32_t i = 0; i < gate.num_vars; i++ )
+      {
+        rule.push_back( { node_type::pi_, i, {} } );
+        support.push_back( i );
+      }
+      auto cpy = gate.function;
+      gate_disjoint = false;
+      compute_dsd( cpy, support, rule );
+
+      /* ignore gates with reconvergence */
+      if ( gate_disjoint )
+        continue;
+
+      _dsd_map.insert( { gate.function, rule } );
+      if ( verbose )
+      {
+        std::cout << "Dsd:\n";
+        print_rule( rule, rule[rule.size() - 1] );
+      }
+
+      /* Aig conversion */
+      auto aig_rule = map_to_aig( rule );
+      if ( verbose )
+      {
+        std::cout << "\nAig:\n";
+        print_rule( aig_rule, aig_rule[aig_rule.size() - 1] );
+      }
+
+      /* Rules derivation */
+      std::vector<std::vector<dsd_node>> der_rules = {};
+      der_rules.push_back( aig_rule );
+      std::vector<std::tuple<uint32_t, uint32_t>> depths = { { get_depth( aig_rule, aig_rule[aig_rule[aig_rule.size() - 1].fanin[0].index] ), get_depth( aig_rule, aig_rule[aig_rule[aig_rule.size() - 1].fanin[1].index] ) } };
+      create_rules_from_dsd( der_rules, aig_rule, aig_rule[aig_rule.size() - 1], depths, true, true );
+      if ( verbose )
+      {
+        std::cout << "\nDerived:\n";
+      }
+
+      /* Indexing of rules and subrules, and_table construction, and gates' label assignement */
+      for ( auto elem : der_rules )
+      {
+        gate_pol = 0;
+        shift = 0;
+        std::vector<uint8_t> perm( gate.num_vars );
+        auto index_rule = do_indexing_rule( elem, elem[elem.size() - 1], max_label, gate_pol, perm, shift );
+
+        /* skip gate creation for small gates (<`min_vars` inputs) */
+        if ( gate.num_vars < min_vars )
+          continue;
+
+        supergate<NInputs> sg = { &gate,
+                                  static_cast<float>( gate.area ),
+                                  gate.tdelay,
+                                  perm,
+                                  gate_pol };
+
+        auto& v = _label_to_gate[index_rule.data];
+
+        auto it = std::lower_bound( v.begin(), v.end(), sg, [&]( auto const& s1, auto const& s2 ) {
+          if ( s1.area < s2.area )
+            return true;
+          if ( s1.area > s2.area )
+            return false;
+          if ( s1.root->num_vars < s2.root->num_vars )
+            return true;
+          if ( s1.root->num_vars > s2.root->num_vars )
+            return true;
+          return s1.root->id < s2.root->id;
+        } );
+
+        v.insert( it, sg );
+
+        if ( verbose )
+        {
+          print_rule( elem, elem[elem.size() - 1] );
+          std::cout << "\n";
+          for ( const std::pair<uint32_t, supergates_list_t>& elem : _label_to_gate )
+          {
+            std::cout << elem.first << "\n";
+            for ( auto sg : elem.second )
+            {
+              std::cout << ( sg.root )->root->expression << "\n";
+            }
+          }
+        }
+      }
+
+      if ( verbose )
+      {
+        std::cout << "\n";
+        std::cout << "And table:\n";
+        print_and_table();
+        std::cout << "\n";
+      }
+    }
+    if ( verbose )
+      std::cout << "\n";
+  }
+
+  void generate_composed_gates()
+  {
+    /* filter multi-output gates */
+    std::unordered_map<std::string, uint32_t> multioutput_map;
+    multioutput_map.reserve( _gates.size() );
+
+    for ( const auto& g : _gates )
+    {
+      if ( multioutput_map.find( g.name ) != multioutput_map.end() )
+      {
+        multioutput_map[g.name] += 1;
+      }
+      else
+      {
+        multioutput_map[g.name] = 1;
+      }
+    }
+
+    /* create composed gates */
+    uint32_t ignored = 0;
+    for ( const auto& g : _gates )
+    {
+      std::array<float, NInputs> pin_to_pin_delays{};
+
+      /* filter large gates and multi-output gates */
+      if ( g.function.num_vars() > NInputs || multioutput_map[g.name] > 1 )
+      {
+        ++ignored;
+        continue;
+      }
+
+      auto i = 0u;
+      for ( auto const& pin : g.pins )
+      {
+        /* use worst pin delay */
+        pin_to_pin_delays[i++] = std::max( pin.rise_block_delay, pin.fall_block_delay );
+      }
+
+      _supergates.emplace_back( composed_gate<NInputs>{ static_cast<unsigned int>( _supergates.size() ),
+                                                        false,
+                                                        &g,
+                                                        g.num_vars,
+                                                        g.function,
+                                                        g.area,
+                                                        pin_to_pin_delays,
+                                                        {} } );
+    }
+  }
+
+  void select_dominated_gates( std::vector<bool>& skip_gates )
+  {
+    for ( uint32_t i = 0; i < skip_gates.size() - 1; ++i )
+    {
+      if ( _supergates[i].root == nullptr )
+        continue;
+
+      if ( skip_gates[i] )
+        continue;
+
+      auto const& tti = _supergates[i].function;
+      for ( uint32_t j = i + 1; j < skip_gates.size(); ++j )
+      {
+        auto const& ttj = _supergates[j].function;
+
+        /* get the same functionality */
+        if ( tti != ttj )
+          continue;
+
+        /* is i smaller than j */
+        bool smaller = _supergates[i].area < _supergates[j].area;
+
+        /* is i faster for every pin */
+        bool faster = true;
+        for ( uint32_t k = 0; k < tti.num_vars(); ++k )
+        {
+          if ( _supergates[i].tdelay[k] > _supergates[j].tdelay[k] )
+            faster = false;
+        }
+
+        if ( smaller && faster )
+        {
+          skip_gates[j] = true;
+          continue;
+        }
+
+        /* is j faster for every pin */
+        faster = true;
+        for ( uint32_t k = 0; k < tti.num_vars(); ++k )
+        {
+          if ( _supergates[j].tdelay[k] > _supergates[i].tdelay[k] )
+            faster = false;
+        }
+
+        if ( !smaller && faster )
+        {
+          skip_gates[i] = true;
+          break;
+        }
+      }
+    }
+  }
+
+  uint32_t try_top_dec( kitty::dynamic_truth_table& tt, uint32_t num_vars )
+  {
+    uint32_t i = 0;
+    for ( ; i < num_vars; i++ )
+    {
+      auto res = is_top_dec( tt, i, false );
+      if ( res.type != node_type::none )
+        break;
+    }
+    return i;
+  }
+
+  dsd_node do_top_dec( kitty::dynamic_truth_table& tt, uint32_t index, std::vector<uint32_t> mapped_support )
+  {
+    auto node = is_top_dec( tt, index, false, &tt );
+
+    node.fanin[0].index = mapped_support[index];
+    return node;
+  }
+
+  std::tuple<int, int> try_bottom_dec( kitty::dynamic_truth_table& tt, uint32_t num_vars )
+  {
+    uint32_t i;
+    uint32_t j;
+    dsd_node res;
+    for ( i = 0; i < num_vars; i++ )
+    {
+      for ( j = i + 1; j < num_vars; j++ )
+      {
+        res = is_bottom_dec( tt, i, j );
+        if ( res.type != node_type::none )
+          break;
+      }
+      if ( res.type != node_type::none )
+        break;
+    }
+    std::tuple<int, int> ret = { i, j };
+    return ret;
+  }
+
+  dsd_node do_bottom_dec( kitty::dynamic_truth_table& tt, uint32_t i, uint32_t j, uint32_t new_index, std::vector<uint32_t>& mapped_support )
+  {
+    auto node = is_bottom_dec( tt, i, j, &tt, new_index, false );
+
+    node.fanin[0].index = mapped_support[i];
+    node.fanin[1].index = mapped_support[j];
+
+    mapped_support[i] = node.index;
+    return node;
+  }
+
+  dsd_node do_shannon_dec( kitty::dynamic_truth_table tt, uint32_t index, kitty::dynamic_truth_table& co0, kitty::dynamic_truth_table& co1, std::vector<uint32_t> mapped_support )
+  {
+    auto node = shannon_dec( tt, index, &co0, &co1 );
+    node.fanin[0].index = mapped_support[index];
+    return node;
+  }
+
+  void update_support( std::vector<uint32_t>& v, uint32_t index )
+  {
+    uint32_t i = 0;
+    for ( ; i < v.size() && i < index; i++ )
+      ;
+
+    for ( ; i < v.size() - 1; i++ )
+    {
+      v[i] = v[i + 1];
+    }
+
+    v.pop_back();
+  }
+
+  template<class TT>
+  void min_base_shrink( TT& tt, TT& tt_shr )
+  {
+    kitty::min_base_inplace( tt );
+    kitty::shrink_to_inplace( tt_shr, tt );
+  }
+
+  uint32_t is_PI( kitty::dynamic_truth_table const& rem, uint32_t n_vars )
+  {
+    for ( uint32_t i = 0; i < n_vars; i++ )
+    {
+      auto var = rem.construct();
+      kitty::create_nth_var( var, i );
+      if ( rem == var )
+      {
+        return i;
+      }
+    }
+    return invalid_index;
+  }
+
+  uint32_t is_inv_PI( kitty::dynamic_truth_table const& rem, uint32_t n_vars )
+  {
+    for ( uint32_t i = 0; i < n_vars; i++ )
+    {
+      auto var = rem.construct();
+      kitty::create_nth_var( var, i );
+      if ( rem == ~var )
+      {
+        return i;
+      }
+    }
+    return invalid_index;
+  }
+
+  void update_found_rule( kitty::dynamic_truth_table& tt, std::vector<uint32_t>& mapped_support, std::vector<dsd_node>& rule )
+  {
+    uint32_t count_old = 0;
+    uint32_t count_curr = 0;
+    std::vector<dsd_node> new_rule;
+    auto found_rule = get_rules( tt );
+    std::copy_if( found_rule.begin(), found_rule.end(), std::back_inserter( new_rule ), []( dsd_node n ) {
+      return ( n.type != node_type::pi_ );
+    } );
+    for_each( found_rule.begin(), found_rule.end(), [&]( dsd_node elem ) {
+      if ( elem.type == node_type::pi_ )
+        count_old++;
+    } );
+    for_each( rule.begin(), rule.end(), [&]( dsd_node elem ) {
+      count_curr++;
+    } );
+    /* update index of node */
+    std::transform( new_rule.begin(), new_rule.end(), new_rule.begin(), [&]( dsd_node& n ) -> dsd_node {
+      return { n.type, n.index + count_curr - count_old, n.fanin };
+    } );
+    /* update index of signal of fanins of nodes */
+    std::transform( new_rule.begin(), new_rule.end(), new_rule.begin(), [&]( dsd_node& n ) -> dsd_node {
+      transform( n.fanin.begin(), n.fanin.end(), n.fanin.begin(), [&]( signal s ) -> signal {
+        if ( s.index >= count_old )
+          return { s.inv, s.index + count_curr - count_old };
+        else
+          return { s.inv, mapped_support[s.index] };
+      } );
+      return { n.type, n.index, n.fanin };
+    } );
+    rule.insert( rule.end(), new_rule.begin(), new_rule.end() );
+  }
+
+  /*! \brief Compute DSD decomposition for a boolean function recursively.
+   *
+   *  \param tt dynamic truth table representing the function.
+   *  \param mapped_support vector indicating function's support at every recursive step.
+   *  \param rule DSD decomposition of the function.
+   * Returns index of dsd_node to add to rule.
+   */
+  uint32_t compute_dsd( kitty::dynamic_truth_table& tt, std::vector<uint32_t> mapped_support, std::vector<dsd_node>& rule )
+  {
+    /* Function has been already found */
+    if ( !get_rules( tt ).empty() )
+    {
+      update_found_rule( tt, mapped_support, rule );
+      return rule.size() - 1;
+    }
+    /* try top decomposition */
+    uint32_t i = try_top_dec( tt, tt.num_vars() );
+    if ( i < tt.num_vars() ) // it was top decomposable
+    {
+      auto res = do_top_dec( tt, i, mapped_support );
+
+      update_support( mapped_support, i );
+
+      kitty::dynamic_truth_table tt_shr( tt.num_vars() - 1 );
+      min_base_shrink( tt, tt_shr );
+
+      if ( is_PI( tt_shr, tt_shr.num_vars() ) == invalid_index && is_inv_PI( tt_shr, tt_shr.num_vars() ) == invalid_index ) // check if remainder is PI
+      {
+        res.fanin.push_back( { 0, compute_dsd( tt_shr, mapped_support, rule ) } );
+      }
+      else
+      {
+        if ( is_PI( tt_shr, tt_shr.num_vars() ) != invalid_index )
+        {
+          res.fanin.push_back( { 0, mapped_support[is_PI( tt_shr, tt_shr.num_vars() )] } );
+        }
+        else
+        {
+          res.fanin.push_back( { 1, mapped_support[is_inv_PI( tt_shr, tt_shr.num_vars() )] } );
+        }
+      }
+      res.index = rule.size();
+      rule.push_back( res );
+
+      return res.index;
+    }
+    else /* try bottom decomposition */
+    {
+      auto couple = try_bottom_dec( tt, tt.num_vars() );
+      i = std::get<0>( couple );
+      uint32_t j = std::get<1>( couple );
+
+      if ( i < tt.num_vars() ) // it was bottom decomposable
+      {
+        auto res = do_bottom_dec( tt, i, j, rule.size(), mapped_support );
+        rule.push_back( res );
+
+        update_support( mapped_support, j );
+
+        kitty::dynamic_truth_table tt_shr( tt.num_vars() - 1 );
+        min_base_shrink( tt, tt_shr );
+
+        return compute_dsd( tt_shr, mapped_support, rule );
+      }
+      else /* do shannon decomposition */
+      {
+        kitty::dynamic_truth_table co0( tt.num_vars() );
+        kitty::dynamic_truth_table co1( tt.num_vars() );
+        kitty::dynamic_truth_table co0_shr( tt.num_vars() - 1 );
+        kitty::dynamic_truth_table co1_shr( tt.num_vars() - 1 );
+
+        uint32_t index = find_unate_var( tt );
+
+        auto res = do_shannon_dec( tt, index, co0, co1, mapped_support );
+
+        /* check for reconvergence */
+        gate_disjoint = true;
+
+        uint32_t inv_var_co1 = is_inv_PI( co1, co1.num_vars() );
+        uint32_t var_co1 = is_PI( co1, co1.num_vars() );
+        uint32_t inv_var_co0 = is_inv_PI( co0, co0.num_vars() );
+        uint32_t var_co0 = is_PI( co0, co0.num_vars() );
+
+        update_support( mapped_support, index );
+
+        if ( inv_var_co1 == invalid_index && var_co1 == invalid_index ) // check if co1 is PI
+        {
+          min_base_shrink( co1, co1_shr );
+          res.fanin.insert( res.fanin.begin(), { 0, compute_dsd( co1_shr, mapped_support, rule ) } );
+        }
+        else
+        {
+          if ( inv_var_co1 != invalid_index )
+          {
+            uint32_t map_inv_var_co1 = mapped_support[inv_var_co1];
+            res.fanin.insert( res.fanin.begin(), { 1, map_inv_var_co1 } );
+          }
+          else
+          {
+            uint32_t map_var_co1 = mapped_support[var_co1];
+            res.fanin.insert( res.fanin.begin(), { 0, map_var_co1 } );
+          }
+        }
+
+        if ( inv_var_co0 == invalid_index && var_co0 == invalid_index ) // check if co0 is PI
+        {
+          min_base_shrink( co0, co0_shr );
+          res.fanin.insert( res.fanin.begin(), { 0, compute_dsd( co0_shr, mapped_support, rule ) } );
+        }
+        else
+        {
+          if ( inv_var_co0 != invalid_index )
+          {
+            uint32_t map_inv_var_co0 = mapped_support[inv_var_co0];
+            res.fanin.insert( res.fanin.begin(), { 1, map_inv_var_co0 } );
+          }
+          else
+          {
+            uint32_t map_var_co0 = mapped_support[var_co0];
+            res.fanin.insert( res.fanin.begin(), { 0, map_var_co0 } );
+          }
+        }
+
+        res.index = rule.size();
+        rule.push_back( res );
+
+        return res.index;
+      }
+    }
+  }
+
+  rule get_rules( kitty::dynamic_truth_table const& tt )
+  {
+    auto match = _dsd_map.find( tt );
+    if ( match != _dsd_map.end() )
+      return match->second;
+    return {};
+  }
+
+  dsd_node* get_father( rule& rule, dsd_node& node )
+  {
+    for ( uint32_t i = 0; i < rule.size(); i++ )
+    {
+      if ( rule[i].type != node_type::pi_ && rule[i].type != node_type::zero_ && ( rule[i].fanin[0].index == node.index || rule[i].fanin[1].index == node.index ) )
+        return &rule[i];
+    }
+    return nullptr;
+  }
+
+  dsd_node* find_node( rule& r, uint32_t i )
+  {
+    for ( uint32_t j = 0; j < r.size(); j++ )
+    {
+      if ( r[j].index == i )
+        return &r[j];
+    }
+    return nullptr;
+  }
+
+  /*! \brief Convert rule derived from DSD decomposition into aig format.
+   *
+   *  \param r rule to convert.
+   * Returns rule converted into aig format.
+   */
+  rule map_to_aig( rule& r )
+  {
+    std::vector<dsd_node> rule( r );
+    std::vector<dsd_node> aig_rule;
+
+    std::transform( rule.begin(), rule.end(), rule.begin(), []( dsd_node n ) -> dsd_node {
+      for ( auto& s : n.fanin )
+      {
+        s.index += 1;
+      }
+      return { n.type, n.index + 1, n.fanin };
+    } );
+
+    rule.insert( rule.begin(), { node_type::zero_, 0, {} } );
+
+    for ( typename std::vector<dsd_node>::reverse_iterator i = rule.rbegin(); i != rule.rend(); ++i )
+    {
+      dsd_node n = *i;
+      dsd_node new_node;
+
+      if ( n.type == node_type::and_ || n.type == node_type::pi_ || n.type == node_type::zero_ )
+      {
+        new_node = n;
+      }
+      else if ( n.type == node_type::or_ )
+      {
+        new_node = { node_type::and_, n.index, { { ~n.fanin[0].inv, n.fanin[0].index }, { ~n.fanin[1].inv, n.fanin[1].index } } };
+        if ( get_father( rule, n ) != nullptr )
+        {
+          dsd_node* father = find_node( aig_rule, get_father( rule, n )->index );
+          if ( father->fanin[0].index == n.index )
+          {
+            father->fanin[0].inv = ~father->fanin[0].inv;
+          }
+          else
+          {
+            father->fanin[1].inv = ~father->fanin[1].inv;
+          }
+        }
+        else // it is root
+        {
+          dsd_node new_root = { node_type::and_, static_cast<uint32_t>( rule.size() ), { { 1, 0 }, { 1, n.index } } };
+          aig_rule.insert( aig_rule.begin(), new_root );
+        }
+      }
+      else if ( n.type == node_type::mux_ )
+      {
+        if ( get_father( rule, n ) != nullptr )
+        {
+          dsd_node* father = find_node( aig_rule, get_father( rule, n )->index );
+          if ( father->fanin[0].index == n.index )
+          {
+            father->fanin[0].inv = ~father->fanin[0].inv;
+          }
+          else
+          {
+            father->fanin[1].inv = ~father->fanin[1].inv;
+          }
+          dsd_node node_or = { node_type::and_, n.index + 2, { { 1, n.index }, { 1, n.index + 1 } } };
+          dsd_node node_and1 = { node_type::and_, n.index + 1, { { 0, n.fanin[2].index }, { n.fanin[1].inv, n.fanin[1].index } } };
+          new_node = { node_type::and_, n.index, { { 1, n.fanin[2].index }, { n.fanin[0].inv, n.fanin[0].index } } }; // and0_node
+
+          // node already in aig_rule must have index and fanin index update (index += 2, fanin_index -> (>= n.index -> +2; nothing))
+          for ( auto& elem : aig_rule )
+          {
+            elem.index += 2;
+            for ( auto& s : elem.fanin )
+            {
+              if ( s.index >= n.index )
+                s.index += 2;
+            }
+          }
+
+          aig_rule.insert( aig_rule.begin(), node_or );
+          aig_rule.insert( aig_rule.begin(), node_and1 );
+        }
+        else // it is root
+        {
+          dsd_node new_root = { node_type::and_, static_cast<uint32_t>( rule.size() + 2 ), { { 1, 0 }, { 1, static_cast<uint32_t>( rule.size() ) + 1 } } };
+          dsd_node node_or = { node_type::and_, static_cast<uint32_t>( rule.size() + 1 ), { { 1, static_cast<uint32_t>( rule.size() - 1 ) }, { 1, static_cast<uint32_t>( rule.size() ) } } };
+          dsd_node node_and1 = { node_type::and_, static_cast<uint32_t>( rule.size() ), { { 0, n.fanin[2].index }, { n.fanin[1].inv, n.fanin[1].index } } };
+          new_node = { node_type::and_, static_cast<uint32_t>( rule.size() - 1 ), { { 1, n.fanin[2].index }, { n.fanin[0].inv, n.fanin[0].index } } }; // and0_node
+
+          aig_rule.insert( aig_rule.begin(), new_root );
+          aig_rule.insert( aig_rule.begin(), node_or );
+          aig_rule.insert( aig_rule.begin(), node_and1 );
+        }
+      }
+      else if ( n.type == node_type::xor_ )
+      {
+        if ( get_father( rule, n ) != nullptr )
+        {
+          dsd_node* father = find_node( aig_rule, get_father( rule, n )->index );
+          if ( father->fanin[0].index == n.index )
+          {
+            father->fanin[0].inv = ~father->fanin[0].inv;
+          }
+          else
+          {
+            father->fanin[1].inv = ~father->fanin[1].inv;
+          }
+          dsd_node node_or = { node_type::and_, n.index + 2, { { 1, n.index }, { 1, n.index + 1 } } };
+          dsd_node node_and1 = { node_type::and_, n.index + 1, { { 0, n.fanin[0].index }, { 1, n.fanin[1].index } } };
+          new_node = { node_type::and_, n.index, { { 1, n.fanin[0].index }, { 0, n.fanin[1].index } } }; // and0_node
+
+          // node already in aig_rule must have index and fanin index update (index += 2, fanin_index -> (>= n.index -> +2; nothing))
+          for ( auto& elem : aig_rule )
+          {
+            elem.index += 2;
+            for ( auto& s : elem.fanin )
+            {
+              if ( s.index >= n.index )
+                s.index += 2;
+            }
+          }
+
+          aig_rule.insert( aig_rule.begin(), node_or );
+          aig_rule.insert( aig_rule.begin(), node_and1 );
+        }
+        else // it is root
+        {
+          dsd_node new_root = { node_type::and_, static_cast<uint32_t>( rule.size() + 2 ), { { 1, 0 }, { 1, static_cast<uint32_t>( rule.size() + 1 ) } } };
+          dsd_node node_or = { node_type::and_, static_cast<uint32_t>( rule.size() + 1 ), { { 1, static_cast<uint32_t>( rule.size() - 1 ) }, { 1, static_cast<uint32_t>( rule.size() ) } } };
+          dsd_node node_and1 = { node_type::and_, static_cast<uint32_t>( rule.size() ), { { 0, n.fanin[0].index }, { 1, n.fanin[1].index } } };
+          new_node = { node_type::and_, static_cast<uint32_t>( rule.size() - 1 ), { { 1, n.fanin[0].index }, { 0, n.fanin[1].index } } }; // and0_node
+
+          aig_rule.insert( aig_rule.begin(), new_root );
+          aig_rule.insert( aig_rule.begin(), node_or );
+          aig_rule.insert( aig_rule.begin(), node_and1 );
+        }
+      }
+      aig_rule.insert( aig_rule.begin(), new_node );
+    }
+    return aig_rule;
+  }
+
+  void swap( rule& rule, dsd_node* node_i, dsd_node* node_j )
+  {
+    auto i = node_i->index;
+    auto j = node_j->index;
+    node_i->index = j;
+    node_j->index = i;
+    std::swap( rule[i], rule[j] );
+  }
+
+  /* makes left or right move to derive a new rule */
+  void make_move( rule& rule, dsd_node* target, dsd_node* r, uint8_t left )
+  {
+    auto targ_index = 0 + left;
+    auto r_index = 1 - targ_index;
+    auto temp_index = target->fanin[targ_index].index;
+    auto temp_inv = target->fanin[targ_index].inv;
+    // swap position internal to rule of the two elements
+    swap( rule, target, r );
+    auto temp = target;
+    target = r;
+    r = temp;
+
+    // adjust children
+    target->fanin[targ_index].index = r->index;
+    target->fanin[targ_index].inv = 0;
+    r->fanin[r_index].index = temp_index;
+    r->fanin[r_index].inv = temp_inv;
+  }
+
+  /* checks whether a rule with the same left depth or right depth has already been encountered */
+  bool check_depths( rule rule, dsd_node root, std::vector<std::tuple<uint32_t, uint32_t>> depth_branches, uint32_t left ) // for left = 1 check if left move is possible
+  {
+    auto left_node = rule[root.fanin[0].index];
+    auto right_node = rule[root.fanin[1].index];
+    auto left_depth = get_depth( rule, left_node );
+    auto right_depth = get_depth( rule, right_node );
+    auto left_it1 = std::find( depth_branches.begin(), depth_branches.end(), std::tuple<uint32_t, uint32_t>{ left_depth - 1, right_depth + 1 } );
+    auto left_it2 = std::find( depth_branches.begin(), depth_branches.end(), std::tuple<uint32_t, uint32_t>{ right_depth + 1, left_depth - 1 } );
+    auto right_it1 = std::find( depth_branches.begin(), depth_branches.end(), std::tuple<uint32_t, uint32_t>{ left_depth + 1, right_depth - 1 } );
+    auto right_it2 = std::find( depth_branches.begin(), depth_branches.end(), std::tuple<uint32_t, uint32_t>{ right_depth - 1, left_depth + 1 } );
+    if ( left )
+      return left_it1 == depth_branches.end() && left_it2 == depth_branches.end();
+    else
+      return right_it1 == depth_branches.end() && right_it2 == depth_branches.end();
+  }
+
+  /*! \brief Recursively create new rules from the given one.
+   *         For every dsd node of the original rule, left and right moves are tried.
+   *         Then, the same algorithm is applied to all derived rules.
+   *         The algorithm stops if no new acceptable rules can be found.
+   *         A new rule is acceptable if no other rule with the same left and right depths has been found.
+   *  \param new_rules vector of derived rules.
+   *  \param rule original rule.
+   *  \param start_node node of rule on which we try the right and left moves.
+   *  \param depth_branches vector of encountered left and right depths.
+   *  \param can_left specifies if we can perform a left move.
+   *  \param can_right specifies if we can perform a right move.
+   */
+  void create_rules_from_dsd( std::vector<rule>& new_rules, rule rule, dsd_node start_node, std::vector<std::tuple<uint32_t, uint32_t>>& depth_branches, bool can_left, bool can_right )
+  {
+    if ( start_node.type == node_type::pi_ || start_node.type == node_type::zero_ ) // if you cannot produce new rules or you are a PI return
+      return;
+
+    std::vector<dsd_node> left_rule( rule );
+    std::vector<dsd_node> right_rule( rule );
+    std::vector<std::tuple<uint32_t, uint32_t>> next_depths = {};
+    auto left_node = &left_rule[start_node.fanin[0].index];
+    auto right_node = &right_rule[start_node.fanin[1].index];
+    bool new_left = false;
+    bool new_right = false;
+
+    std::tuple<uint32_t, uint32_t> depths = { get_depth( rule, *left_node ), get_depth( rule, *right_node ) };
+    depth_branches.push_back( depths );
+
+    /* left move */
+    if ( can_left && left_node->type == start_node.type && start_node.fanin[0].inv == 0 && check_depths( rule, start_node, depth_branches, 1 ) )
+    {
+      auto r = &left_rule[start_node.index];
+
+      make_move( left_rule, left_node, r, 1 );
+
+      new_rules.push_back( left_rule );
+      new_left = true;
+      depth_branches.push_back( { get_depth( left_rule, left_rule[left_rule[left_node->index].fanin[0].index] ), get_depth( left_rule, left_rule[left_rule[left_node->index].fanin[1].index] ) } );
+    }
+    /* right move */
+    if ( can_right && right_node->type == start_node.type && start_node.fanin[1].inv == 0 && check_depths( rule, start_node, depth_branches, 0 ) )
+    {
+      auto r = &right_rule[start_node.index];
+
+      make_move( right_rule, right_node, r, 0 );
+
+      new_rules.push_back( right_rule );
+      new_right = true;
+      depth_branches.push_back( { get_depth( right_rule, right_rule[right_rule[right_node->index].fanin[0].index] ), get_depth( right_rule, right_rule[right_rule[right_node->index].fanin[1].index] ) } );
+    }
+
+    /* initial rule, start_node left children */
+    create_rules_from_dsd( new_rules, rule, rule[start_node.fanin[0].index], next_depths, true, true );
+    /* initial rule, start_node right children */
+    create_rules_from_dsd( new_rules, rule, rule[start_node.fanin[1].index], next_depths, true, true );
+    /* left rule, start_node new root */
+    if ( new_left )
+    {
+      create_rules_from_dsd( new_rules, left_rule, left_rule[start_node.index], depth_branches, true, false );
+    }
+    /* right rule, start_node new root */
+    if ( new_right )
+    {
+      create_rules_from_dsd( new_rules, right_rule, right_rule[start_node.index], depth_branches, false, true );
+    }
+  }
+
+  uint32_t compute_canonized_polarity( uint32_t polarity, uint32_t left_pi, uint32_t right_pi, uint32_t obs_pi )
+  {
+    uint32_t mask_l = 0;
+    uint32_t mask_r = 0;
+    uint32_t mask_obs = 0;
+    for ( uint32_t i = 0; i < obs_pi; i++ )
+    {
+      mask_obs |= ( 1 << i );
+    }
+    for ( uint32_t i = obs_pi; i < obs_pi + left_pi; i++ )
+    {
+      mask_l |= ( 1 << i );
+    }
+    for ( uint32_t i = obs_pi + left_pi; i < obs_pi + left_pi + right_pi; i++ )
+    {
+      mask_r |= ( 1 << i );
+    }
+    return ( ( polarity & mask_l ) << right_pi ) | ( ( polarity & mask_r ) >> left_pi ) | ( polarity & mask_obs );
+  }
+
+  void compute_canonized_permutation( std::vector<uint8_t>& perm, uint32_t left_pi, uint32_t right_pi, uint32_t obs_pi )
+  {
+    std::vector copy( perm );
+    for ( uint32_t i = obs_pi; i < obs_pi + right_pi; i++ )
+    {
+      perm[i] = copy[( i + left_pi )];
+    }
+    for ( uint32_t i = right_pi + obs_pi; i < perm.size(); i++ )
+    {
+      perm[i] = copy[( i - right_pi )];
+    }
+  }
+
+  /*! \brief Recursively assigns indexes to a rule and its subrules and builds and_table.
+   *         It also computes negations and permutations for the gate whose rule is being passed as parameter.
+   *
+   *  \param r rule to index.
+   *  \param n dsd node to start from.
+   *  \param max max index assigned.
+   *  \param polarity polarity of gate.
+   *  \param perm permutation of gate.
+   *  \param shift specifies the number of PIs encountered.
+   * Returns label to be assigned to gate whose rule is r.
+   */
+  label do_indexing_rule( rule r, dsd_node n, uint32_t& max, uint32_t& polarity, std::vector<uint8_t>& perm, uint32_t& shift )
+  {
+    if ( n.type == node_type::pi_ )
+    {
+      perm[shift] = n.index - 1;
+      return { 0, 1 };
+    }
+    if ( n.type == node_type::zero_ )
+      return { 0, 0 };
+
+    uint32_t obs_pi = shift;
+
+    /* do indexing on the left */
+    uint32_t left_index = do_indexing_rule( r, r[n.fanin[0].index], max, polarity, perm, shift ).index;
+    if ( r[n.fanin[0].index].type == node_type::pi_ )
+    {
+      polarity |= ( n.fanin[0].inv << shift );
+      shift++;
+    }
+    /* encountered PIs on the left */
+    uint32_t left_pi = shift - obs_pi;
+
+    /* do indexing on the right */
+    uint32_t right_index = do_indexing_rule( r, r[n.fanin[1].index], max, polarity, perm, shift ).index;
+    if ( r[n.fanin[1].index].type == node_type::pi_ )
+    {
+      polarity |= ( n.fanin[1].inv << shift );
+      shift++;
+    }
+    /* encountered PIs on the right */
+    uint32_t right_pi = shift - obs_pi - left_pi;
+
+    /* check if it is inverted gate */
+    if ( n.fanin[0].index == 0 && n.fanin[1].inv && n.index == r.size() - 1 )
+    {
+      return { 1, right_index };
+    }
+    signal left, right;
+
+    /* ignore invertion of PIs */
+    if ( r[n.fanin[0].index].type == node_type::pi_ )
+      left.inv = 0;
+    else
+      left.inv = (uint64_t)n.fanin[0].inv;
+    left.index = left_index;
+    if ( r[n.fanin[1].index].type == node_type::pi_ )
+      right.inv = 0;
+    else
+      right.inv = (uint64_t)n.fanin[1].inv;
+    right.index = right_index;
+
+    std::tuple<signal, signal> t;
+
+    /* canonize and_table on left index being smaller than right one */
+    if ( left.index <= right.index )
+      t = std::make_tuple( left, right );
+    else
+    {
+      /* new polarity */
+      polarity = compute_canonized_polarity( polarity, left_pi, right_pi, obs_pi );
+
+      /* new permutation */
+      compute_canonized_permutation( perm, left_pi, right_pi, obs_pi );
+
+      t = std::make_tuple( right, left );
+    }
+    auto match = _and_table.find( t );
+    if ( match != _and_table.end() )
+      return { 0, match->second };
+    max++;
+    /* insert new value in and_table */
+    _and_table.insert( { t, max } );
+    return { 0, max };
+  }
+
+  template<class TT>
+  dsd_node is_top_dec( const TT& tt, uint32_t var_index, bool allow_xor = false, TT* func = nullptr )
+  {
+    static_assert( kitty::is_complete_truth_table<TT>::value, "Can only be applied on complete truth tables." );
+
+    auto var = tt.construct();
+    kitty::create_nth_var( var, var_index );
+
+    if ( kitty::implies( tt, var ) )
+    {
+      if ( func )
+      {
+        *func = kitty::cofactor1( tt, var_index );
+      }
+      dsd_node res = { node_type::and_, var_index, {} };
+      res.fanin.push_back( { 0, UINT32_MAX } );
+      return res;
+    }
+    else if ( kitty::implies( var, tt ) )
+    {
+      if ( func )
+      {
+        *func = kitty::cofactor0( tt, var_index );
+      }
+      dsd_node res = { node_type::or_, var_index, {} };
+      res.fanin.push_back( { 0, UINT32_MAX } );
+      return res;
+    }
+    else if ( kitty::implies( tt, ~var ) )
+    {
+      if ( func )
+      {
+        *func = kitty::cofactor0( tt, var_index );
+      }
+      dsd_node res = { node_type::and_, var_index, {} };
+      res.fanin.push_back( { 1, UINT32_MAX } );
+      return res;
+    }
+    else if ( kitty::implies( ~var, tt ) )
+    {
+      if ( func )
+      {
+        *func = kitty::cofactor1( tt, var_index );
+      }
+      dsd_node res = { node_type::or_, var_index, {} };
+      res.fanin.push_back( { 1, UINT32_MAX } );
+      return res;
+    }
+
+    if ( allow_xor )
+    {
+      /* try XOR */
+      const auto co0 = kitty::cofactor0( tt, var_index );
+      const auto co1 = kitty::cofactor1( tt, var_index );
+
+      if ( kitty::equal( co0, ~co1 ) )
+      {
+        if ( func )
+        {
+          *func = co0;
+        }
+        dsd_node res = { node_type::xor_, var_index, {} };
+        res.fanin.push_back( { 0, UINT32_MAX } );
+        return res;
+      }
+    }
+
+    return { node_type::none, var_index, {} };
+  }
+
+  template<class TT>
+  dsd_node is_bottom_dec( const TT& tt, uint32_t var_index1, uint32_t var_index2, TT* func = nullptr, uint32_t new_index = invalid_index, bool allow_xor = false )
+  {
+    static_assert( kitty::is_complete_truth_table<TT>::value, "Can only be applied on complete truth tables." );
+
+    const auto tt0 = kitty::cofactor0( tt, var_index1 );
+    const auto tt1 = kitty::cofactor1( tt, var_index1 );
+
+    const auto tt00 = kitty::cofactor0( tt0, var_index2 );
+    const auto tt01 = kitty::cofactor1( tt0, var_index2 );
+    const auto tt10 = kitty::cofactor0( tt1, var_index2 );
+    const auto tt11 = kitty::cofactor1( tt1, var_index2 );
+
+    const auto eq01 = kitty::equal( tt00, tt01 );
+    const auto eq02 = kitty::equal( tt00, tt10 );
+    const auto eq03 = kitty::equal( tt00, tt11 );
+    const auto eq12 = kitty::equal( tt01, tt10 );
+    const auto eq13 = kitty::equal( tt01, tt11 );
+    const auto eq23 = kitty::equal( tt10, tt11 );
+
+    const auto num_pairs =
+        static_cast<uint32_t>( eq01 ) +
+        static_cast<uint32_t>( eq02 ) +
+        static_cast<uint32_t>( eq03 ) +
+        static_cast<uint32_t>( eq12 ) +
+        static_cast<uint32_t>( eq13 ) +
+        static_cast<uint32_t>( eq23 );
+
+    if ( num_pairs != 2u && num_pairs != 3 )
+    {
+      return { node_type::none, invalid_index, {} };
+    }
+
+    if ( !eq01 && !eq02 && !eq03 ) // 00 is different
+    {
+      if ( func )
+      {
+        *func = kitty::mux_var( var_index1, tt11, tt00 );
+      }
+      dsd_node res = { node_type::or_, new_index, {} };
+      res.fanin.push_back( { 0, var_index1 } );
+      res.fanin.push_back( { 0, var_index2 } );
+      return res;
+    }
+    else if ( !eq01 && !eq12 && !eq13 ) // 01 is different
+    {
+      if ( func )
+      {
+        *func = kitty::mux_var( var_index1, tt01, tt10 );
+      }
+      dsd_node res = { node_type::and_, new_index, {} };
+      res.fanin.push_back( { 1, var_index1 } );
+      res.fanin.push_back( { 0, var_index2 } );
+      return res;
+    }
+    else if ( !eq02 && !eq12 && !eq23 ) // 10 is different
+    {
+      if ( func )
+      {
+        *func = kitty::mux_var( var_index1, tt01, tt10 );
+      }
+      dsd_node res = { node_type::or_, new_index, {} };
+      res.fanin.push_back( { 1, var_index1 } );
+      res.fanin.push_back( { 0, var_index2 } );
+      return res;
+    }
+    else if ( !eq03 && !eq13 && !eq23 ) // 11 is different
+    {
+      if ( func )
+      {
+        *func = kitty::mux_var( var_index1, tt11, tt00 );
+      }
+      dsd_node res = { node_type::and_, new_index, {} };
+      res.fanin.push_back( { 0, var_index1 } );
+      res.fanin.push_back( { 0, var_index2 } );
+      return res;
+    }
+    else if ( allow_xor ) // XOR
+    {
+      if ( func )
+      {
+        *func = kitty::mux_var( var_index1, tt01, tt00 );
+      }
+      dsd_node res = { node_type::xor_, new_index, {} };
+      res.fanin.push_back( { 0, var_index1 } );
+      res.fanin.push_back( { 0, var_index2 } );
+      return res;
+    }
+
+    return { node_type::none, invalid_index, {} };
+  }
+
+  template<class TT>
+  uint32_t find_unate_var( const TT tt )
+  {
+    for ( uint32_t index = 0; index < tt.num_vars() - 2; ++index )
+    {
+      const auto tt0 = kitty::cofactor0( tt, index );
+      const auto tt1 = kitty::cofactor1( tt, index );
+      if ( ( ( tt0 & tt1 ) == tt0 ) && ( ( tt0 & tt1 ) == tt1 ) )
+        return index;
+    }
+
+    return tt.num_vars() - 1;
+  }
+
+  template<class TT>
+  dsd_node shannon_dec( const TT& tt, uint32_t index, TT* func0 = nullptr, TT* func1 = nullptr )
+  {
+    static_assert( kitty::is_complete_truth_table<TT>::value, "Can only be applied on complete truth tables." );
+
+    const auto tt0 = kitty::cofactor0( tt, index );
+    const auto tt1 = kitty::cofactor1( tt, index );
+
+    dsd_node res = { node_type::mux_, index, {} };
+    res.fanin.push_back( { 0, index } );
+
+    if ( func0 && func1 )
+    {
+      *func0 = tt0;
+      *func1 = tt1;
+    }
+
+    return res;
+  }
+
+  /*! \brief Get depth of rule starting from a specific dsd_node.
+   *
+   *  \param rule rule
+   *  \param n dsd_node to start from
+   * Returns depth of rule starting from n.
+   */
+  uint32_t get_depth( rule rule, dsd_node n )
+  {
+    if ( n.type == node_type::pi_ || n.type == node_type::zero_ )
+    {
+      return 0;
+    }
+    uint32_t max_depth;
+    uint32_t left_depth = get_depth( rule, rule[n.fanin[0].index] );
+    uint32_t right_depth = get_depth( rule, rule[n.fanin[1].index] );
+    max_depth = ( left_depth > right_depth ) ? left_depth : right_depth;
+    return max_depth + 1;
+  }
+
+#pragma region Report
+  std::string to_string( node_type t )
+  {
+    if ( t == node_type::and_ )
+      return "*";
+    if ( t == node_type::or_ )
+      return "+";
+    if ( t == node_type::mux_ )
+      return "+";
+    if ( t == node_type::xor_ )
+      return "xor";
+    if ( t == node_type::pi_ )
+      return "pi";
+    if ( t == node_type::none )
+      return "none";
+    if ( t == node_type::zero_ )
+      return "zero";
+  }
+
+  void print_dsd_node( dsd_node& n )
+  {
+    std::cout << n.index << " " << to_string( n.type ) << " ";
+    for ( auto elem : n.fanin )
+      std::cout << "{" << elem.index << ", " << elem.inv << "}";
+    std::cout << "\n";
+  }
+
+  void print_rule( rule& r )
+  {
+    for ( auto elem : r )
+      print_dsd_node( elem );
+  }
+
+  /*! \brief Print expression of a rule.
+   *
+   *  \param rule rule.
+   *  \param n dsd_node to start from.
+   */
+  void print_rule( rule rule, dsd_node n )
+  {
+    if ( n.type == node_type::pi_ )
+    {
+      std::cout << char( 'a' + n.index );
+      return;
+    }
+    if ( n.type == node_type::zero_ )
+    {
+      std::cout << "0";
+      return;
+    }
+    else
+    {
+      std::cout << "(";
+      if ( n.fanin[0].inv )
+      {
+        std::cout << "!";
+      }
+      if ( n.type == node_type::mux_ )
+      {
+        std::cout << "!" << char( 'a' + n.fanin[2].index ) << " * ";
+      }
+      print_rule( rule, rule[n.fanin[0].index] );
+      std::cout << " " << to_string( n.type ) << " ";
+      if ( n.type == node_type::mux_ )
+      {
+        std::cout << char( 'a' + n.fanin[2].index ) << " * ";
+      }
+      if ( n.fanin[1].inv )
+      {
+        std::cout << "!";
+      }
+      print_rule( rule, rule[n.fanin[1].index] );
+      std::cout << ")";
+    }
+  }
+#pragma endregion
+
+private:
+  bool gate_disjoint{ false }; /* flag for gate support*/
+
+  std::vector<gate> const& _gates; /* collection of gates */
+  composed_list_t _supergates;     /* list of composed_gates */
+  lib_rule _dsd_map;               /* hash map for DSD decomposition of gates */
+  lib_table _and_table;            /* AND table */
+  map_label_gate _label_to_gate;   /* map label to gate */
+};
+
+} // namespace mockturtle
diff --git a/include/mockturtle/utils/super_utils.hpp b/include/mockturtle/utils/super_utils.hpp
index 59298eef2..def2e5729 100644
--- a/include/mockturtle/utils/super_utils.hpp
+++ b/include/mockturtle/utils/super_utils.hpp
@@ -37,67 +37,52 @@
 #include <unordered_map>
 #include <vector>
 
+#include <fmt/format.h>
 #include <kitty/constructors.hpp>
 #include <kitty/dynamic_truth_table.hpp>
 
 #include "../io/genlib_reader.hpp"
 #include "../io/super_reader.hpp"
+#include "include/supergate.hpp"
 
 namespace mockturtle
 {
 
 struct super_utils_params
 {
+  /*! \brief load multi-output gates in simple supergates */
+  bool load_multioutput_in_single{ false };
+
   /*! \brief reports loaded supergates */
   bool verbose{ false };
 };
 
-template<unsigned NInputs>
-struct composed_gate
-{
-  /* unique ID */
-  uint32_t id;
-
-  /* gate is a supergate */
-  bool is_super{ false };
-
-  /* pointer to the root library gate */
-  gate const* root{ nullptr };
-
-  /* support of the composed gate */
-  uint32_t num_vars{ 0 };
-
-  /* function */
-  kitty::dynamic_truth_table function;
-
-  /* area */
-  double area{ 0.0 };
-
-  /* pin-to-pin delays */
-  std::array<float, NInputs> tdelay{};
-
-  /* fanin gates */
-  std::vector<composed_gate<NInputs>*> fanin{};
-};
-
 /*! \brief Utilities to generate supergates
  *
  * This class creates supergates starting from supergates
  * specifications contained in `supergates_spec` extracted
  * from a SUPER file.
  *
+ * Multi-output gates are also extracted from the list of
+ * GENLIB gates. However multi-output gates are currently not
+ * supported as supergates members.
+ *
  * This utility is called by `tech_library` to construct
  * the library for technology mapping.
  */
-template<unsigned NInputs = 5u>
+template<unsigned NInputs = 6u>
 class super_utils
 {
+private:
+  static constexpr uint32_t truth_table_size = 6;
+
 public:
   explicit super_utils( std::vector<gate> const& gates, super_lib const& supergates_spec = {}, super_utils_params const ps = {} )
       : _gates( gates ),
         _supergates_spec( supergates_spec ),
         _ps( ps ),
-        _supergates()
+        _supergates(),
+        _multioutput_gates()
   {
     if ( _supergates_spec.supergates.size() == 0 )
     {
@@ -129,20 +114,58 @@ class super_utils
     return simple_gates_size;
   }
 
+  /*! \brief Get multi-output gates.
+   *
+   * Returns a list of multioutput gates.
+   */
+  const std::vector<std::vector<composed_gate<NInputs>>>& get_multioutput_library() const
+  {
+    return _multioutput_gates;
+  }
+
 public:
   void generate_library_with_genlib()
   {
     uint32_t initial_size = _supergates.size();
 
+    std::unordered_map<std::string, uint32_t> multioutput_map;
+    std::unordered_map<std::string, uint32_t> multioutput_idx;
+    multioutput_map.reserve( _gates.size() );
+
+    /* look for multi-output gates (gates with the same name) */
+    uint32_t multioutput_i = 0;
+    for ( const auto& g : _gates )
+    {
+      if ( multioutput_map.find( g.name ) != multioutput_map.end() )
+      {
+        /* assign an index */
+        if ( multioutput_map[g.name] == 1 )
+          multioutput_idx[g.name] = multioutput_i++;
+
+        multioutput_map[g.name] += 1;
+      }
+      else
+      {
+        multioutput_map[g.name] = 1;
+        multioutput_idx[g.name] = UINT32_MAX;
+      }
+    }
+
+    /* create composed gates */
+    uint32_t ignored = 0;
+    uint32_t ignored_id = 0;
     for ( const auto& g : _gates )
     {
       std::array<float, NInputs> pin_to_pin_delays{};
 
       if ( g.function.num_vars() > NInputs )
       {
-        std::cerr << "[i] WARNING: gate " << g.name << " IGNORED, too many variables for the library settings" << std::endl;
+        ++ignored;
+        ignored_id = g.id;
         continue;
       }
+      if ( g.function.num_vars() > truth_table_size )
+        continue;
 
       auto i = 0u;
       for ( auto const& pin : g.pins )
@@ -151,27 +174,53 @@ class super_utils
         pin_to_pin_delays[i++] = std::max( pin.rise_block_delay, pin.fall_block_delay );
       }
 
-      _supergates.emplace_back( composed_gate<NInputs>{ static_cast<unsigned int>( _supergates.size() ),
-                                                        false,
-                                                        &g,
-                                                        g.num_vars,
-                                                        g.function,
-                                                        g.area,
-                                                        pin_to_pin_delays,
-                                                        {} } );
+      if ( multioutput_map[g.name] == 1 || _ps.load_multioutput_in_single )
+      {
+        _supergates.emplace_back( composed_gate<NInputs>{ static_cast<unsigned int>( _supergates.size() ),
+                                                          false,
+                                                          &g,
+                                                          g.num_vars,
+                                                          g.function,
+                                                          g.area,
+                                                          pin_to_pin_delays,
+                                                          {} } );
+      }
+
+      if ( multioutput_map[g.name] > 1 )
+      {
+        uint32_t idx = multioutput_idx[g.name];
+        if ( _multioutput_gates.size() <= idx )
+          _multioutput_gates.emplace_back( std::vector<composed_gate<NInputs>>() );
+
+        _multioutput_gates[multioutput_idx[g.name]].emplace_back(
+            composed_gate<NInputs>{ static_cast<unsigned int>( idx ),
+                                    false,
+                                    &g,
+                                    g.num_vars,
+                                    g.function,
+                                    g.area,
+                                    pin_to_pin_delays,
+                                    {} } );
+      }
     }
 
     simple_gates_size = _supergates.size() - initial_size;
 
     if ( _ps.verbose )
     {
-      std::cout << fmt::format( "[i] Loaded {} simple gates in the library\n", simple_gates_size );
+      std::cout << fmt::format( "[i] Loading {} simple gates in the library\n", simple_gates_size );
+      std::cout << fmt::format( "[i] Loading {} multi-output gates in the library\n", _multioutput_gates.size() );
+    }
+
+    if ( ignored > 0 )
+    {
+      std::cerr << fmt::format( "[i] WARNING: {} gates IGNORED (e.g., {}), too many inputs for the library settings\n", ignored, _gates[ignored_id].name );
     }
   }
 
   void generate_library_with_super()
   {
-    if ( _supergates_spec.max_num_vars > NInputs )
+    if ( _supergates_spec.max_num_vars > NInputs || _supergates_spec.max_num_vars > truth_table_size )
     {
       std::cerr << fmt::format(
           "ERROR: NInputs ({}) should be greater or equal than the max number of variables ({}) in the super file.\n", NInputs, _supergates_spec.max_num_vars );
@@ -187,7 +236,7 @@ class super_utils
     {
       if ( gates_map.find( g.name ) != gates_map.end() )
       {
-        std::cerr << fmt::format( "WARNING: ignoring genlib gate {}, duplicated name entry.", g.name ) << std::endl;
+        std::cerr << fmt::format( "WARNING: ignoring genlib gate {}, duplicated name entry in supergates.", g.name ) << std::endl;
       }
       else
       {
@@ -271,7 +320,7 @@ class super_utils
 
       /* force at `is_super = false` simple gates considered as supergates.
        * This is necessary to not have duplicates since tech_library
-       * computes independently the permutations for simple gates.
+       * computes indipendently the permutations for simple gates.
        * Moreover simple gates permutations could be incomplete in SUPER
        * libraries which are constrained by the number of gates. */
       bool is_super_verified = g.is_super;
@@ -407,6 +456,7 @@ class super_utils
   super_lib const& _supergates_spec;
   super_utils_params const _ps;
   std::deque<composed_gate<NInputs>> _supergates;
+  std::vector<std::vector<composed_gate<NInputs>>> _multioutput_gates;
 }; /* class super_utils */
 
 } /* namespace mockturtle */
diff --git a/include/mockturtle/utils/tech_library.hpp b/include/mockturtle/utils/tech_library.hpp
index 148a476e5..cd647b53c 100644
--- a/include/mockturtle/utils/tech_library.hpp
+++ b/include/mockturtle/utils/tech_library.hpp
@@ -1,5 +1,5 @@
 /* mockturtle: C++ logic network library
- * Copyright (C) 2018-2022  EPFL
+ * Copyright (C) 2018-2023  EPFL
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
@@ -34,13 +34,16 @@
 
 #pragma once
 
+#include <array>
 #include <cassert>
 #include <unordered_map>
 #include <vector>
 
 #include <kitty/constructors.hpp>
 #include <kitty/dynamic_truth_table.hpp>
+#include <kitty/hash.hpp>
 #include <kitty/npn.hpp>
+#include <kitty/operators.hpp>
 #include <kitty/print.hpp>
 #include <kitty/static_truth_table.hpp>
 
@@ -48,6 +51,9 @@
 
 #include "../io/genlib_reader.hpp"
 #include "../io/super_reader.hpp"
+#include "include/supergate.hpp"
+#include "standard_cell.hpp"
+#include "struct_library.hpp"
 #include "super_utils.hpp"
 
 namespace mockturtle
@@ -81,14 +87,34 @@ std::string const mcnc_library = "GATE   inv1    1  O=!a;             PIN * INV
 
 enum class classification_type : uint32_t
 {
-  /* generate the NP configurations (n! * 2^n) */
+  /*! \brief generate the NP configurations (n! * 2^n)
+   *  Direct matching: best up to ~200 library gates */
   np_configurations = 0,
-  /* generate the P configurations (n!) and N-canonization */
+
+  /*! \brief generate the P configurations (n!)
+   *  Matching by N-canonization: best for more
+   * than ~200 library gates */
   p_configurations = 1,
+
+  /*! \brief generate the n configurations (2^n)
+   *  Direct fast matching, less quality */
+  n_configurations = 2,
 };
 
 struct tech_library_params
 {
+  /*! \brief Load large gates with more than 6 inputs */
+  bool load_large_gates{ true };
+
+  /*! \brief Loads multioutput gates in the library */
+  bool load_multioutput_gates{ true };
+
+  /*! \brief Remove dominated gates (larger sizes) */
+  bool remove_dominated_gates{ true };
+
+  /*! \brief Loads multioutput gates in single-output library */
+  bool load_multioutput_gates_single{ false };
+
   /*! \brief reports np enumerations */
   bool verbose{ false };
 
@@ -96,25 +122,25 @@ struct tech_library_params
   bool very_verbose{ false };
 };
 
-template<unsigned NInputs>
-struct supergate
+namespace detail
 {
-  /* pointer to the root gate */
-  composed_gate<NInputs> const* root{};
-
-  /* area */
-  float area{ 0.0 };
 
-  /* pin-to-pin delay */
-  std::array<float, NInputs> tdelay{};
+template<uint32_t NumVars, uint32_t NumOutputs>
+struct tuple_tt_hash
+{
+  inline std::size_t operator()( std::array<kitty::static_truth_table<NumVars, true>, NumOutputs> const& tts ) const
+  {
+    std::size_t seed = kitty::hash_block( tts[0]._bits );
 
-  /* np permutation vector */
-  std::vector<uint8_t> permutation{};
+    for ( auto i = 1; i < NumOutputs; ++i )
+      kitty::hash_combine( seed, kitty::hash_block( tts[i]._bits ) );
 
-  /* pin negations */
-  uint8_t polarity{ 0 };
+    return seed;
+  }
 };
 
+} // namespace detail
+
 /*! \brief Library of gates for Boolean matching
  *
  * This class creates a technology library from a set
@@ -146,34 +172,72 @@ struct supergate
       mockturtle::tech_library lib_super( gates, supergates_spec );
    \endverbatim
  */
-template<unsigned NInputs = 4u, classification_type Configuration = classification_type::np_configurations>
+template<unsigned NInputs = 6u, classification_type Configuration = classification_type::np_configurations>
 class tech_library
 {
+private:
+  static constexpr float epsilon = 0.0005;
+  static constexpr uint32_t max_multi_outputs = 2;
+  static constexpr uint32_t truth_table_size = 6;
   using supergates_list_t = std::vector<supergate<NInputs>>;
-  using tt_hash = kitty::hash<kitty::static_truth_table<NInputs>>;
-  using lib_t = phmap::flat_hash_map<kitty::static_truth_table<NInputs>, supergates_list_t, tt_hash>;
+  using TT = kitty::static_truth_table<truth_table_size>;
+  using tt_hash = kitty::hash<TT>;
+  using multi_tt_hash = detail::tuple_tt_hash<truth_table_size, max_multi_outputs>;
+  using index_t = phmap::flat_hash_map<TT, uint32_t, tt_hash>;
+  using lib_t = phmap::flat_hash_map<TT, supergates_list_t, tt_hash>;
+  using multi_relation_t = std::array<TT, max_multi_outputs>;
+  using multi_supergates_list_t = std::array<std::vector<supergate<NInputs>>, max_multi_outputs>;
+  using multi_lib_t = phmap::flat_hash_map<multi_relation_t, multi_supergates_list_t, multi_tt_hash>;
+  using multi_func_t = phmap::flat_hash_map<uint64_t, uint64_t>;
+  using struct_lib_t = phmap::flat_hash_map<uint32_t, supergates_list_t>;
 
 public:
   explicit tech_library( std::vector<gate> const& gates, tech_library_params const ps = {}, super_lib const& supergates_spec = {} )
       : _gates( gates ),
         _supergates_spec( supergates_spec ),
         _ps( ps ),
-        _super( _gates, _supergates_spec ),
+        _super( _gates, _supergates_spec, super_utils_params{ ps.load_multioutput_gates_single, ps.verbose } ),
         _use_supergates( false ),
-        _super_lib()
+        _struct( _gates ),
+        _super_lib(),
+        _multi_lib(),
+        _struct_lib()
   {
+    static_assert( NInputs < 16, "The technology library database supports NInputs up to 15\n" );
+
     generate_library();
+
+    if ( ps.load_multioutput_gates )
+      generate_multioutput_library();
+
+    if ( ps.load_large_gates )
+    {
+      _struct.construct( 2, _ps.very_verbose );
+    }
   }
 
   explicit tech_library( std::vector<gate> const& gates, super_lib const& supergates_spec, tech_library_params const ps = {} )
       : _gates( gates ),
         _supergates_spec( supergates_spec ),
         _ps( ps ),
-        _super( _gates, _supergates_spec, super_utils_params{ ps.verbose } ),
+        _super( _gates, _supergates_spec, super_utils_params{ ps.load_multioutput_gates_single, ps.verbose } ),
         _use_supergates( true ),
-        _super_lib()
+        _struct( _gates ),
+        _super_lib(),
+        _multi_lib(),
+        _struct_lib()
   {
+    static_assert( NInputs < 16, "The technology library database supports NInputs up to 15\n" );
+
     generate_library();
+
+    if ( ps.load_multioutput_gates )
+      generate_multioutput_library();
+
+    if ( ps.load_large_gates )
+    {
+      _struct.construct( 2, _ps.very_verbose );
+    }
   }
 
   /*! \brief Get the gates matching the function.
@@ -181,7 +245,7 @@ class tech_library
    * Returns a list of gates that match the function represented
    * by the truth table.
    */
-  const supergates_list_t* get_supergates( kitty::static_truth_table<NInputs> const& tt ) const
+  const supergates_list_t* get_supergates( TT const& tt ) const
   {
     auto match = _super_lib.find( tt );
     if ( match != _super_lib.end() )
@@ -189,6 +253,51 @@ class tech_library
     return nullptr;
   }
 
+  /*! \brief Get the multi-output gates matching the function.
+   *
+   * Returns a list of multi-output gates that match the function
+   * represented by the truth table.
+   */
+  const multi_supergates_list_t* get_multi_supergates( std::array<TT, max_multi_outputs> const& tts ) const
+  {
+    auto match = _multi_lib.find( tts );
+    if ( match != _multi_lib.end() )
+      return &match->second;
+    return nullptr;
+  }
+
+  /*! \brief Get the multi-output gate function ID for a single output.
+   *
+   * Returns the function ID of a multi-output gate output if matched. This function
+   * supports up to 6 inputs. Returns zero in case of no match.
+   */
+  uint64_t get_multi_function_id( uint64_t const& tt ) const
+  {
+    auto match = _multi_funcs.find( tt );
+    if ( match != _multi_funcs.end() )
+      return match->second;
+    return 0;
+  }
+
+  /*! \brief Get the pattern ID for structural matching.
+   *
+   * Returns a pattern ID if found, UINT32_MAX otherwise given the
+   * children IDs. This function works with only AND operators.
+   */
+  uint32_t get_pattern_id( uint32_t id1, uint32_t id2 ) const
+  {
+    return _struct.get_pattern_id( id1, id2 );
+  }
+
+  /*! \brief Get the gates matching the pattern ID and phase.
+   *
+   * Returns a list of gates that match the pattern ID and the given polarity.
+   */
+  const supergates_list_t* get_supergates_pattern( uint32_t id, bool phase ) const
+  {
+    return _struct.get_supergates_pattern( id, phase );
+  }
+
   /*! \brief Get inverter information.
    *
    * Returns area, delay, and ID of the smallest inverter.
@@ -219,6 +328,34 @@ class tech_library
     return _gates;
   }
 
+  /*! \brief Returns the standard cells. */
+  const std::vector<standard_cell> get_cells() const
+  {
+    return get_standard_cells( _gates );
+  }
+
+  /*! \brief Returns multioutput gates. */
+  const std::vector<std::vector<composed_gate<NInputs>>>& get_multioutput_gates() const
+  {
+    return _super.get_multioutput_library();
+  }
+
+  /*! \brief Returns the number of multi-output gates loaded in the library. */
+  const uint32_t num_multioutput_gates() const
+  {
+    if ( !_ps.load_multioutput_gates )
+      return 0;
+    return _multi_lib.size();
+  }
+
+  /*! \brief Returns the number of gates for structural matching. */
+  const uint32_t num_structural_gates() const
+  {
+    if ( !_ps.load_large_gates || NInputs <= truth_table_size )
+      return 0;
+    return _struct.get_struct_library().size();
+  }
+
 private:
   void generate_library()
   {
@@ -234,7 +371,7 @@ class tech_library
         if ( kitty::is_const0( kitty::cofactor1( gate.function, 0 ) ) )
         {
           /* get the smallest area inverter */
-          if ( !inv || gate.area < _inv_area )
+          if ( !inv || gate.area < _inv_area - epsilon )
           {
             _inv_area = gate.area;
             _inv_delay = compute_worst_delay( gate );
@@ -245,7 +382,7 @@ class tech_library
         else
         {
           /* get the smallest area buffer */
-          if ( !buf || gate.area < _buf_area )
+          if ( !buf || gate.area < _buf_area - epsilon )
           {
             _buf_area = gate.area;
             _buf_delay = compute_worst_delay( gate );
@@ -259,12 +396,27 @@ class tech_library
     auto const& supergates = _super.get_super_library();
     uint32_t const standard_gate_size = _super.get_standard_library_size();
 
+    std::vector<bool> skip_gates( supergates.size(), false );
+
+    if ( _ps.remove_dominated_gates )
+    {
+      select_dominated_gates( supergates, skip_gates );
+    }
+
     /* generate the configurations for the standard gates */
     uint32_t i = 0u;
+    uint32_t skip_count = 0;
     for ( auto const& gate : supergates )
     {
       uint32_t np_count = 0;
 
+      if ( skip_gates[skip_count++] )
+      {
+        /* exclude gate */
+        ++i;
+        continue;
+      }
+
       if ( gate.root == nullptr )
       {
         /* exclude PIs */
@@ -288,7 +440,7 @@ class tech_library
             sg.polarity |= ( ( neg >> perm[i] ) & 1 ) << i; /* permutate input negation to match the right pin */
           }
 
-          const auto static_tt = kitty::extend_to<NInputs>( tt );
+          const auto static_tt = kitty::extend_to<truth_table_size>( tt );
 
           auto& v = _super_lib[static_tt];
 
@@ -342,14 +494,14 @@ class tech_library
                                       static_cast<float>( gate.area ),
                                       {},
                                       perm,
-                                      static_cast<uint8_t>( phase ) };
+                                      static_cast<uint16_t>( phase ) };
 
             for ( auto i = 0u; i < perm.size() && i < NInputs; ++i )
             {
               sg.tdelay[i] = gate.tdelay[perm[i]];
             }
 
-            const auto static_tt = kitty::extend_to<NInputs>( tt_canon );
+            const auto static_tt = kitty::extend_to<truth_table_size>( tt_canon );
 
             auto& v = _super_lib[static_tt];
 
@@ -400,6 +552,14 @@ class tech_library
           const auto tt = gate.function;
           kitty::exact_np_enumeration( tt, on_np );
         }
+        else if ( Configuration == classification_type::n_configurations )
+        {
+          /* N enumeration of the function */
+          const auto tt = gate.function;
+          std::vector<uint8_t> pin_order( tt.num_vars() );
+          std::iota( pin_order.begin(), pin_order.end(), 0 );
+          kitty::exact_n_enumeration( tt, [&]( auto const& tt, auto neg ) { on_np( tt, neg, pin_order ); } );
+        }
         else
         {
           /* P enumeration followed by N canonization of the function */
@@ -424,14 +584,14 @@ class tech_library
                                     static_cast<float>( gate.area ),
                                     {},
                                     perm,
-                                    static_cast<uint8_t>( neg ) };
+                                    static_cast<uint16_t>( neg ) };
 
           for ( auto i = 0u; i < perm.size() && i < NInputs; ++i )
           {
             sg.tdelay[i] = gate.tdelay[perm[i]];
           }
 
-          const auto static_tt = kitty::extend_to<NInputs>( tt );
+          const auto static_tt = kitty::extend_to<truth_table_size>( tt );
 
           auto& v = _super_lib[static_tt];
 
@@ -486,14 +646,14 @@ class tech_library
                                       static_cast<float>( gate.area ),
                                       {},
                                       perm,
-                                      static_cast<uint8_t>( phase ) };
+                                      static_cast<uint16_t>( phase ) };
 
             for ( auto i = 0u; i < perm.size() && i < NInputs; ++i )
             {
               sg.tdelay[i] = gate.tdelay[perm[i]];
             }
 
-            const auto static_tt = kitty::extend_to<NInputs>( tt_canon );
+            const auto static_tt = kitty::extend_to<truth_table_size>( tt_canon );
 
             auto& v = _super_lib[static_tt];
 
@@ -552,7 +712,7 @@ class tech_library
         }
       }
 
-      if ( _ps.verbose )
+      if ( _ps.very_verbose )
       {
         std::cout << "Gate " << gate.root->name << ", num_vars = " << gate.num_vars << ", np entries = " << np_count << std::endl;
       }
@@ -583,6 +743,231 @@ class tech_library
     }
   }
 
+  /* Supports only NP configurations */
+  void generate_multioutput_library()
+  {
+    uint32_t np_count = 0;
+    std::string ignored_name;
+
+    /* load multi-output gates */
+    auto const& multioutput_gates = _super.get_multioutput_library();
+
+    uint32_t ignored_gates = 0;
+    for ( auto const& multi_gate : multioutput_gates )
+    {
+      /* select the on up to max_multi_outputs outputs */
+      if ( multi_gate.size() > max_multi_outputs )
+      {
+        ignored_name = multi_gate[0].root->name;
+        ++ignored_gates;
+        continue;
+      }
+
+      std::array<size_t, max_multi_outputs> order = { 0 };
+
+      const auto on_np = [&]( auto const& tts, auto neg, auto const& perm ) {
+        std::vector<supergate<NInputs>> multi_sg;
+
+        for ( auto const& gate : multi_gate )
+        {
+          multi_sg.emplace_back( supergate<NInputs>{ &gate,
+                                                     static_cast<float>( gate.area ),
+                                                     {},
+                                                     perm,
+                                                     0 } );
+        }
+
+        for ( auto i = 0u; i < perm.size() && i < NInputs; ++i )
+        {
+          uint32_t j = 0;
+          for ( auto& sg : multi_sg )
+          {
+            sg.tdelay[i] = multi_gate[j++].tdelay[perm[i]];
+            sg.polarity |= ( ( neg >> perm[i] ) & 1 ) << i; /* permutate input negation to match the right pin */
+          }
+        }
+
+        std::array<TT, max_multi_outputs> static_tts = {};
+        std::array<TT, max_multi_outputs> sorted_tts = {};
+
+        /* canonize output */
+        for ( auto i = 0; i < tts.size(); ++i )
+        {
+          static_tts[i] = kitty::extend_to<truth_table_size>( tts[i] );
+          if ( ( static_tts[i]._bits & 1 ) == 1 )
+          {
+            static_tts[i] = ~static_tts[i];
+            multi_sg[i].polarity |= 1 << NInputs; /* set flipped output polarity*/
+          }
+        }
+
+        std::iota( order.begin(), order.end(), 0 );
+
+        std::sort( order.begin(), order.end(), [&]( size_t a, size_t b ) {
+          return static_tts[a] < static_tts[b];
+        } );
+
+        std::transform( order.begin(), order.end(), sorted_tts.begin(), [&]( size_t a ) {
+          return static_tts[a];
+        } );
+
+        // std::sort( static_tts.begin(), static_tts.end() );
+
+        auto& v = _multi_lib[sorted_tts];
+
+        /* ordered insert by ascending area and number of input pins */
+        auto it = std::lower_bound( v[0].begin(), v[0].end(), multi_sg[0], [&]( auto const& s1, auto const& s2 ) {
+          if ( s1.area < s2.area )
+            return true;
+          if ( s1.area > s2.area )
+            return false;
+          if ( s1.root->num_vars < s2.root->num_vars )
+            return true;
+          if ( s1.root->num_vars > s2.root->num_vars )
+            return true;
+          return s1.root->id < s2.root->id;
+        } );
+
+        bool to_add = true;
+        /* search for duplicated elements due to symmetries */
+        while ( it != v[0].end() )
+        {
+          /* if different gate, exit */
+          if ( multi_sg[0].root->id != it->root->id )
+            break;
+
+          /* if already in the library, exit */
+          if ( multi_sg[order[0]].polarity != it->polarity )
+          {
+            ++it;
+            continue;
+          }
+
+          bool same_delay = true;
+          size_t d = std::distance( v[0].begin(), it );
+          for ( auto i = 0; i < multi_sg.size(); ++i )
+          {
+            if ( multi_sg[order[i]].tdelay != v[i][d].tdelay )
+            {
+              same_delay = false;
+              break;
+            }
+          }
+
+          /* do not add if equivalent to another in the library */
+          if ( same_delay )
+          {
+            to_add = false;
+            break;
+          }
+
+          ++it;
+        }
+
+        if ( to_add )
+        {
+          size_t d = std::distance( v[0].begin(), it );
+          for ( auto i = 0; i < multi_sg.size(); ++i )
+          {
+            v[i].insert( v[i].begin() + d, multi_sg[order[i]] );
+          }
+          ++np_count;
+        }
+      };
+
+      /* NP enumeration of the function */
+      std::vector<kitty::dynamic_truth_table> tts;
+      for ( auto gate : multi_gate )
+        tts.push_back( gate.function );
+      kitty::exact_multi_np_enumeration( tts, on_np );
+
+      /* NPN enumeration of the single outputs */
+      for ( auto const& gate : multi_gate )
+      {
+        exact_npn_enumeration( gate.function, [&]( auto const& tt, auto neg, auto const& perm ) {
+          (void)neg;
+          (void)perm;
+          _multi_funcs[tt._bits[0]] = gate.function._bits[0];
+        } );
+      }
+    }
+
+    /* update area based on the single output contribution */
+    multi_update_area();
+
+    if ( _ps.verbose && ignored_gates > 0 )
+    {
+      std::cerr << fmt::format( "[i] WARNING: {} multi-output gates IGNORED (e.g., {}), too many outputs for the library settings\n", ignored_gates, ignored_name );
+    }
+
+    // std::cout << _multi_lib.size() << "\n";
+  }
+
+  void multi_update_area()
+  {
+    /* update area for each sub-function in a multi-output gate with their contribution */
+    for ( auto& pair : _multi_lib )
+    {
+      auto& multi_gates = pair.second;
+      for ( auto i = 0; i < multi_gates[0].size(); ++i )
+      {
+        /* get sum of area and area count */
+        double area = 0;
+        uint32_t contribution_count = 0;
+        std::array<double, max_multi_outputs> area_contribution = { 0 };
+        for ( auto j = 0; j < max_multi_outputs; ++j )
+        {
+          auto& gate = multi_gates[j][i];
+          const TT tt = kitty::extend_to<truth_table_size>( gate.root->function );
+
+          /* get the area of the smallest match with a simple gate */
+          const auto match = get_supergates( tt );
+          if ( match == nullptr )
+            continue;
+
+          area_contribution[j] = ( *match )[0].area;
+          area += area_contribution[j];
+          ++contribution_count;
+
+          // std::cout << fmt::format( "Contribution {}\t = {}\n", ( *match )[0].root->root->name, area_contribution[j] );
+        }
+
+        /* compute scaling factor and remaining area for non-matched gates */
+        double scaling_factor = 1.0;
+        double remaining_area = 0;
+
+        if ( contribution_count != max_multi_outputs )
+        {
+          scaling_factor = 0.9;
+
+          if ( area > multi_gates[0][i].area )
+            scaling_factor -= ( area - multi_gates[0][i].area ) / area;
+
+          remaining_area = ( multi_gates[0][i].area - area * scaling_factor );
+          area = area * scaling_factor + remaining_area;
+          remaining_area /= ( max_multi_outputs - contribution_count );
+        }
+
+        /* assign weighted contribution */
+        // double area_old = multi_gates[0][i].area;
+        // double area_check = 0;
+        for ( auto j = 0; j < max_multi_outputs; ++j )
+        {
+          auto& gate = multi_gates[j][i];
+
+          if ( area_contribution[j] > 0 )
+            gate.area = scaling_factor * area_contribution[j] * gate.area / area;
+          else
+            gate.area = remaining_area;
+
+          // area_check += gate.area;
+        }
+
+        // std::cout << fmt::format( "Area before: {}\t Area after {}\n", area_old, area_check );
+      }
+    }
+  }
+
   float compute_worst_delay( gate const& g )
   {
     float worst_delay = 0.0f;
@@ -596,6 +981,59 @@ class tech_library
     return worst_delay;
   }
 
+  void select_dominated_gates( std::deque<composed_gate<NInputs>> const& supergates, std::vector<bool>& skip_gates )
+  {
+    for ( uint32_t i = 0; i < skip_gates.size() - 1; ++i )
+    {
+      if ( supergates[i].root == nullptr )
+        continue;
+
+      if ( skip_gates[i] )
+        continue;
+
+      auto const& tti = supergates[i].function;
+      for ( uint32_t j = i + 1; j < skip_gates.size(); ++j )
+      {
+        auto const& ttj = supergates[j].function;
+
+        /* get the same functionality */
+        if ( tti != ttj )
+          continue;
+
+        /* is i smaller than j */
+        bool smaller = supergates[i].area < supergates[j].area;
+
+        /* is i faster for every pin */
+        bool faster = true;
+        for ( uint32_t k = 0; k < tti.num_vars(); ++k )
+        {
+          if ( supergates[i].tdelay[k] > supergates[j].tdelay[k] )
+            faster = false;
+        }
+
+        if ( smaller && faster )
+        {
+          skip_gates[j] = true;
+          continue;
+        }
+
+        /* is j faster for every pin */
+        faster = true;
+        for ( uint32_t k = 0; k < tti.num_vars(); ++k )
+        {
+          if ( supergates[j].tdelay[k] > supergates[i].tdelay[k] )
+            faster = false;
+        }
+
+        if ( !smaller && faster )
+        {
+          skip_gates[i] = true;
+          break;
+        }
+      }
+    }
+  }
+
 private:
   /* inverter info */
   float _inv_area{ 0.0 };
@@ -614,14 +1052,19 @@ class tech_library
   std::vector<gate> const _gates;    /* collection of gates */
   super_lib const& _supergates_spec; /* collection of supergates declarations */
   tech_library_params const _ps;
-  super_utils<NInputs> _super; /* supergates generation */
-  lib_t _super_lib;            /* library of enumerated gates */
-};                             /* class tech_library */
+
+  super_utils<NInputs> _super;     /* supergates generation */
+  struct_library<NInputs> _struct; /* library for structural matching */
+  lib_t _super_lib;                /* library of enumerated gates */
+  multi_lib_t _multi_lib;          /* library of enumerated multioutput gates */
+  multi_func_t _multi_funcs;       /* enumerated functions for multioutput gates */
+  struct_lib_t _struct_lib;        /* library of gates for patterns IDs */
+};                                 /* class tech_library */
 
 template<typename Ntk, unsigned NInputs>
 struct exact_supergate
 {
-  signal<Ntk> const root;
+  signal<Ntk> root;
 
   /* number of inputs of the supergate */
   uint8_t n_inputs{ 0 };
@@ -652,6 +1095,8 @@ struct exact_library_params
 
   /* classify in NP instead of NPN */
   bool np_classification{ true };
+  /* Compute DC classes for matching with  don't cares */
+  bool compute_dc_classes{ false };
   /* verbose */
   bool verbose{ false };
 };
@@ -678,16 +1123,22 @@ template<typename Ntk, class RewritingFn, unsigned NInputs = 4u>
 class exact_library
 {
   using supergates_list_t = std::vector<exact_supergate<Ntk, NInputs>>;
-  using tt_hash = kitty::hash<kitty::static_truth_table<NInputs>>;
-  using lib_t = std::unordered_map<kitty::static_truth_table<NInputs>, supergates_list_t, tt_hash>;
+  using TT = kitty::static_truth_table<NInputs>;
+  using tt_hash = kitty::hash<TT>;
+  using lib_t = std::unordered_map<TT, supergates_list_t, tt_hash>;
+  using dc_transformation_t = std::tuple<supergates_list_t const*, uint32_t, std::array<uint8_t, NInputs>>;
+  using dc_t = std::pair<TT, dc_transformation_t>;
+  using dc_lib_t = std::unordered_map<TT, std::vector<dc_t>, tt_hash>;
 
 public:
   explicit exact_library( RewritingFn const& rewriting_fn, exact_library_params const& ps = {} )
       : _database(),
         _rewriting_fn( rewriting_fn ),
         _ps( ps ),
-        _super_lib()
+        _super_lib(),
+        _dc_lib()
   {
+    _super_lib.reserve( 222 );
     generate_library();
   }
 
@@ -696,7 +1147,7 @@ class exact_library
    * Returns a list of graph structures that match the function
    * represented by the truth table.
    */
-  const supergates_list_t* get_supergates( kitty::static_truth_table<NInputs> const& tt ) const
+  const supergates_list_t* get_supergates( TT const& tt ) const
   {
     auto match = _super_lib.find( tt );
     if ( match != _super_lib.end() )
@@ -704,6 +1155,60 @@ class exact_library
     return nullptr;
   }
 
+  /*! \brief Get the structures matching the function with DC.
+   *
+   * Returns a list of graph structures that match the function
+   * represented by the truth table and its dont care set.
+   * This functions also updates the phase and permutation vector
+   * of the original NPN class to the new one obtained using
+   * don't cares.
+   */
+  const supergates_list_t* get_supergates( TT const& tt, TT const& dc, uint32_t& phase, std::vector<uint8_t>& perm ) const
+  {
+    auto match = _super_lib.find( tt );
+    if ( match == _super_lib.end() )
+      return nullptr;
+
+    /* lookup for don't care optimization */
+    auto match_dc = _dc_lib.find( tt );
+    if ( dc._bits == 0 || match_dc == _dc_lib.end() )
+      return &match->second;
+
+    for ( auto const& entry : match_dc->second )
+    {
+      auto const& dc_entry_tt = std::get<0>( entry );
+
+      /* check for containment */
+      if ( ( dc & dc_entry_tt ) == dc_entry_tt )
+      {
+        auto const& dc_entry = std::get<1>( entry );
+
+        /* update phase and perm */
+        uint32_t dc_entry_phase = std::get<1>( dc_entry );
+        auto const& dc_entry_perm = std::get<2>( dc_entry );
+        std::vector<uint8_t> temp_perm( perm.size() );
+        uint32_t temp_phase = dc_entry_phase & ( 1 << NInputs );
+        for ( auto i = 0u; i < NInputs; ++i )
+        {
+          temp_perm[dc_entry_perm[i]] = perm[i];
+          temp_phase |= ( ( dc_entry_phase >> i ) & 1 ) << perm[i];
+        }
+        phase ^= temp_phase;
+        std::copy( temp_perm.begin(), temp_perm.end(), perm.begin() );
+        return std::get<0>( dc_entry );
+      }
+    }
+
+    /* no dont care optimization found */
+    return &match->second;
+  }
+
+  /*! \brief Returns the NPN database of structures. */
+  Ntk& get_database()
+  {
+    return _database;
+  }
+
   /*! \brief Returns the NPN database of structures. */
   const Ntk& get_database() const
   {
@@ -729,8 +1234,8 @@ class exact_library
     }
 
     /* Compute NPN classes */
-    std::unordered_set<kitty::static_truth_table<NInputs>, tt_hash> classes;
-    kitty::static_truth_table<NInputs> tt;
+    std::unordered_set<TT, tt_hash> classes;
+    TT tt;
     do
     {
       const auto res = kitty::exact_npn_canonization( tt );
@@ -738,7 +1243,7 @@ class exact_library
       kitty::next_inplace( tt );
     } while ( !kitty::is_const0( tt ) );
 
-    /* Construct supergates */
+    /* Constuct supergates */
     for ( auto const& entry : classes )
     {
       supergates_list_t supergates_pos;
@@ -769,11 +1274,24 @@ class exact_library
       kitty::dynamic_truth_table function = kitty::extend_to( entry, NInputs );
       _rewriting_fn( _database, function, pis.begin(), pis.end(), add_supergate );
       if ( supergates_pos.size() > 0 )
+      {
+        std::sort( supergates_pos.begin(), supergates_pos.end(), [&]( auto const& a, auto const& b ) {
+          return a.area < b.area;
+        } );
         _super_lib.insert( { entry, supergates_pos } );
+      }
       if ( _ps.np_classification && supergates_neg.size() > 0 )
+      {
+        std::sort( supergates_neg.begin(), supergates_neg.end(), [&]( auto const& a, auto const& b ) {
+          return a.area < b.area;
+        } );
         _super_lib.insert( { not_entry, supergates_neg } );
+      }
     }
 
+    if ( _ps.compute_dc_classes )
+      compute_dont_cares_classes();
+
     if ( _ps.verbose )
     {
       std::cout << "Classified in " << _super_lib.size() << " entries" << std::endl;
@@ -861,11 +1379,139 @@ class exact_library
     return area;
   }
 
+  void compute_dont_cares_classes()
+  {
+    /* save the size for each NPN class */
+    std::unordered_map<TT, uint32_t, tt_hash> class_sizes;
+    for ( auto const& entry : _super_lib )
+    {
+      const unsigned numgates = static_cast<unsigned>( std::get<1>( entry ).front().area );
+      class_sizes.insert( { std::get<0>( entry ), numgates } );
+    }
+
+    uint32_t conflict_found = 0;
+    uint32_t total_exploration = 0;
+
+    /* find don't care links */
+    for ( auto entry_i = class_sizes.begin(); entry_i != class_sizes.end(); ++entry_i )
+    {
+      auto const& tt_i = std::get<0>( *entry_i );
+      auto const current_size = std::get<1>( *entry_i );
+
+      /* use a map to link the dont cares to the new size, NPN class, negations, and permutation vector */
+      using dc_transf_t = std::tuple<uint32_t, TT, uint32_t, std::vector<uint8_t>>;
+      std::unordered_map<TT, dc_transf_t, tt_hash> dc_sets;
+
+      for ( auto entry_j = class_sizes.begin(); entry_j != class_sizes.end(); ++entry_j )
+      {
+        auto const& tt_j = std::get<0>( *entry_j );
+        uint32_t size = std::get<1>( *entry_j );
+
+        /* evaluate DC only for size improvement */
+        if ( size >= current_size )
+          continue;
+
+        /* skip the same NPN class if gates are constructed in NP classes */
+        if ( _ps.np_classification && tt_i == ~tt_j )
+          continue;
+
+        exact_npn_enumeration( tt_j, [&]( auto const& tt, uint32_t phase, std::vector<uint8_t> const& perm ) {
+          /* extract the DC set */
+          const auto dc = tt_i ^ tt;
+
+          /* limit the explosion of DC combinations to evaluate */
+          // if ( kitty::count_ones( dc ) > 3 )
+          //   return;
+
+          ++total_exploration;
+
+          /* check existance: filters ~12% of conflicts */
+          if ( auto const& p = dc_sets.find( dc ); p != dc_sets.end() )
+          {
+            if ( size < std::get<0>( std::get<1>( *p ) ) )
+              dc_sets[dc] = std::make_tuple( size, tt_j, phase, perm );
+
+            ++conflict_found;
+            return;
+          }
+
+          /* check dominance */
+          auto it = dc_sets.begin();
+          while ( it != dc_sets.end() )
+          {
+            auto const& dc_set_tt = std::get<0>( *it );
+            auto const& and_tt = dc_set_tt & dc;
+
+            if ( dc_set_tt == and_tt && std::get<0>( std::get<1>( *it ) ) <= size )
+            {
+              return;
+            }
+            else if ( dc == and_tt && size <= std::get<0>( std::get<1>( *it ) ) )
+            {
+              it = dc_sets.erase( it );
+            }
+            else
+            {
+              ++it;
+            }
+          }
+
+          /* permute phase */
+          uint32_t phase_perm = phase & ( 1 << NInputs );
+          for ( auto i = 0u; i < NInputs; ++i )
+          {
+            phase_perm |= ( ( phase >> perm[i] ) & 1 ) << i;
+          }
+
+          /* insert in the dc_sets */
+          dc_sets[dc] = std::make_tuple( size, tt_j, phase_perm, perm );
+        } );
+      }
+
+      /* add entries to the main data structure */
+      std::vector<dc_t> dc_transformations;
+      dc_transformations.reserve( dc_sets.size() );
+
+      std::array<uint8_t, NInputs> permutation;
+
+      /* insert in a sorted way based on gain */
+      /* TODO: optimize to reduce the number of cycles */
+      for ( auto i = 0u; i < std::get<1>( *entry_i ); ++i )
+      {
+        for ( auto const& dc : dc_sets )
+        {
+          auto const& transf = std::get<1>( dc );
+
+          if ( std::get<0>( transf ) != i )
+          {
+            continue;
+          }
+
+          supergates_list_t const* sg = &_super_lib[std::get<1>( transf )];
+          auto const& perm = std::get<3>( transf );
+
+          assert( perm.size() == NInputs );
+
+          for ( auto j = 0u; j < NInputs; ++j )
+          {
+            permutation[j] = perm[j];
+          }
+
+          dc_transformations.emplace_back( std::make_pair( std::get<0>( dc ), std::make_tuple( sg, std::get<2>( transf ), permutation ) ) );
+        }
+      }
+
+      if ( !dc_transformations.empty() )
+        _dc_lib.insert( { tt_i, dc_transformations } );
+    }
+  }
+
 private:
   Ntk _database;
   RewritingFn const& _rewriting_fn;
   exact_library_params const _ps;
   lib_t _super_lib;
+  dc_lib_t _dc_lib;
 }; /* class exact_library */
 
 } // namespace mockturtle
diff --git a/include/mockturtle/utils/window_utils.hpp b/include/mockturtle/utils/window_utils.hpp
index d74f64d0c..d23636c3e 100644
--- a/include/mockturtle/utils/window_utils.hpp
+++ b/include/mockturtle/utils/window_utils.hpp
@@ -1,5 +1,5 @@
 /* mockturtle: C++ logic network library
- * Copyright (C) 2018-2022  EPFL
+ * Copyright (C) 2018-2023  EPFL
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
@@ -33,6 +33,7 @@
 #pragma once
 
 #include <algorithm>
+#include <optional>
 #include <set>
 #include <type_traits>
 #include <vector>
@@ -55,6 +56,8 @@ inline void collect_nodes_recur( Ntk const& ntk, typename Ntk::node const& n, st
   ntk.paint( n );
 
   ntk.foreach_fanin( n, [&]( signal const& fi ) {
+    if ( ntk.is_constant( ntk.get_node( fi ) ) )
+      return;
     collect_nodes_recur( ntk, ntk.get_node( fi ), nodes );
   } );
   nodes.push_back( n );
@@ -198,36 +201,6 @@ std::vector<typename Ntk::node> collect_inputs( Ntk const& ntk, std::vector<type
   return inputs;
 }
 
-/*! \brief Collect all nodes supported by a cut
- *
- * Network is assumed to be topologically sorted (e.g. just created or wrapped with topo_view)
- */
-template<typename Ntk>
-inline std::vector<typename Ntk::node> collect_supported( Ntk const& ntk, std::vector<typename Ntk::node> const& cut )
-{
-  ntk.new_color();
-  for ( auto const& n : cut )
-  {
-    ntk.paint( n );
-  }
-
-  std::vector<typename Ntk::node> nodes;
-  ntk.foreach_gate( [&]( auto const& n ){
-    if ( ntk.eval_color( n, [&ntk]( auto c ){ return c == ntk.current_color(); } ) )
-    {
-      return true;
-    }
-    if ( ntk.eval_fanins_color( n, [&ntk]( auto c ){ return c == ntk.current_color(); } ) )
-    {
-      ntk.paint( n );
-      nodes.emplace_back( n );
-    }
-    return true;
-  });
-
-  return nodes;
-}
-
 /*! \brief Identify outputs using reference counting
  *
  * Identify outputs using a reference counting approach.  The
@@ -255,16 +228,6 @@ inline std::vector<typename Ntk::node> collect_supported( Ntk const& ntk, std::v
  * - `is_constant`
  * - `make_signal`
  */
-template<typename Ntk>
-inline std::vector<typename Ntk::signal> collect_outputs( Ntk const& ntk,
-                                                          std::vector<typename Ntk::node> const& inputs,
-                                                          std::vector<typename Ntk::node> const& nodes )
-{
-  std::vector<uint32_t> refs;
-  refs.resize( ntk.size() );
-  return collect_outputs( ntk, inputs, nodes );
-}
-
 template<typename Ntk>
 inline std::vector<typename Ntk::signal> collect_outputs( Ntk const& ntk,
                                                           std::vector<typename Ntk::node> const& inputs,
diff --git a/include/mockturtle/views/cell_view.hpp b/include/mockturtle/views/cell_view.hpp
new file mode 100644
index 000000000..24adeb1f0
--- /dev/null
+++ b/include/mockturtle/views/cell_view.hpp
@@ -0,0 +1,297 @@
+/* mockturtle: C++ logic network library
+ * Copyright (C) 2018-2023  EPFL
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*!
+  \file cell_view.hpp
+  \brief Implements methods to bind the network to a standard cell library
+
+  \author Alessandro Tempia Calvino
+*/
+
+#pragma once
+
+#include "../utils/node_map.hpp"
+#include "../utils/standard_cell.hpp"
+#include "../views/topo_view.hpp"
+
+#include <iostream>
+#include <map>
+
+namespace mockturtle
+{
+
+/*! \brief Adds cells to a technology library and mapping API methods.
+ *
+ * This view adds methods to create and manage a mapped network that
+ * implements cells contained in a technology library. This view
+ * is returned by the technology mapping command `map`. It can be used
+ * to report statistics about the network and write the network into
+ * a verilog file. It always adds the functions `has_cell`,
+ * `remove_cell`, `add_cell`, `add_cell_with_check`, `get_cell`,
+ * `get_cell_index`, `get_library`, `compute_area`, `compute_worst_delay`,
+ * `report_stats`, and `report_cells_usage`.
+ *
+ * **Required network functions:**
+ * - `size`
+ * - `foreach_node`
+ * - `foreach_fanin`
+ * - `is_constant`
+ * - `is_pi`
+ *
+ * Example
+ *
+   \verbatim embed:rst
+
+   .. code-block:: c++
+
+      // create network somehow
+      aig_network aig = ...;
+
+      // read cell library in genlib format
+      std::vector<gate> gates;
+      lorina::read_genlib( "file.genlib", genlib_reader( gates ) )
+      tech_library tech_lib( gates );
+
+      // call technology mapping to obtain the view
+      cell_view<block_network> res = emap_block( aig, tech_lib );
+
+      // prints stats and cells usage
+      res.report_stats();
+      res.report_cells_usage();
+   \endverbatim
+ */
+template<class Ntk>
+class cell_view : public Ntk
+{
+public:
+  using node = typename Ntk::node;
+  using signal = typename Ntk::signal;
+
+public:
+  explicit cell_view( std::vector<standard_cell> const& library )
+      : Ntk(), _library{ library }, _cells( *this )
+  {
+    static_assert( is_network_type_v<Ntk>, "Ntk is not a network type" );
+    static_assert( has_foreach_node_v<Ntk>, "Ntk does not implement the foreach_node method" );
+    static_assert( has_foreach_fanin_v<Ntk>, "Ntk does not implement the foreach_fanin method" );
+    static_assert( has_is_constant_v<Ntk>, "Ntk does not implement the is_constant method" );
+    static_assert( has_is_pi_v<Ntk>, "Ntk does not implement the is_pi method" );
+  }
+
+  explicit cell_view( Ntk const& ntk, std::vector<standard_cell> const& library )
+      : Ntk( ntk ), _library{ library }, _cells( *this )
+  {
+    static_assert( is_network_type_v<Ntk>, "Ntk is not a network type" );
+    static_assert( has_foreach_node_v<Ntk>, "Ntk does not implement the foreach_node method" );
+    static_assert( has_foreach_fanin_v<Ntk>, "Ntk does not implement the foreach_fanin method" );
+    static_assert( has_is_constant_v<Ntk>, "Ntk does not implement the is_constant method" );
+    static_assert( has_is_pi_v<Ntk>, "Ntk does not implement the is_pi method" );
+  }
+
+  cell_view<Ntk>& operator=( cell_view<Ntk> const& cell_ntk )
+  {
+    Ntk::operator=( cell_ntk );
+    _library = cell_ntk._library;
+    _cells = cell_ntk._cells;
+    return *this;
+  }
+
+  void add_cell( node const& n, uint32_t cell_id )
+  {
+    assert( cell_id < _library.size() );
+    _cells[n] = cell_id;
+  }
+
+  bool add_cell_with_check( node const& n, uint32_t cell_id )
+  {
+    assert( cell_id < _library.size() );
+
+    auto const& cell = _library[cell_id];
+
+    if constexpr ( has_num_outputs_v<Ntk> && has_node_function_v<Ntk> )
+    {
+      if ( Ntk::num_outputs( n ) != cell.gates.size() )
+        return false;
+
+      for ( uint32_t i = 0; i < Ntk::num_outputs( n ); ++i )
+      {
+        if ( Ntk::node_function_pin( n, i ) != cell.gates[i].function )
+        {
+          return false;
+        }
+      }
+
+      _cells[n] = cell_id;
+      return true;
+    }
+
+    if ( cell.gates.size() > 1 )
+      return false;
+
+    if ( Ntk::node_function( n ) == cell.gates[0].function )
+    {
+      _cells[n] = cell_id;
+      return true;
+    }
+
+    return false;
+  }
+
+  void remove_cell( node const& n ) const
+  {
+    _cells.erase( n );
+  }
+
+  const standard_cell& get_cell( node const& n ) const
+  {
+    return _library[_cells[n]];
+  }
+
+  bool has_cell( node const& n ) const
+  {
+    return _cells.has( n );
+  }
+
+  unsigned int get_cell_index( node const& n ) const
+  {
+    return _cells[n];
+  }
+
+  const std::vector<standard_cell>& get_library() const
+  {
+    return _library;
+  }
+
+  double compute_area() const
+  {
+    double area = 0;
+    Ntk::foreach_node( [&]( auto const& n, auto ) {
+      if ( has_cell( n ) )
+      {
+        area += get_cell( n ).area;
+      }
+    } );
+
+    return area;
+  }
+
+  double compute_worst_delay() const
+  {
+    topo_view ntk_topo{ *this };
+    std::vector<std::vector<double>> delays( Ntk::size() );
+    double worst_delay = 0;
+
+    ntk_topo.foreach_node( [&]( auto const& n, auto ) {
+      if ( Ntk::is_constant( n ) || Ntk::is_pi( n ) )
+      {
+        delays[n].push_back( 0 );
+        return true;
+      }
+
+      if ( has_cell( n ) )
+      {
+        auto const& cell = get_cell( n );
+
+        for ( gate const& g : cell.gates )
+        {
+          double cell_delay = 0;
+          if constexpr ( has_get_output_pin_v<Ntk> )
+          {
+            Ntk::foreach_fanin( n, [&]( signal const& f, auto i ) {
+              cell_delay = std::max( cell_delay, delays[Ntk::get_node( f )][Ntk::get_output_pin( f )] + std::max( g.pins[i].rise_block_delay, g.pins[i].fall_block_delay ) );
+            } );
+          }
+          else
+          {
+            Ntk::foreach_fanin( n, [&]( signal const& f, auto i ) {
+              cell_delay = std::max( cell_delay, delays[Ntk::get_node( f )].front() + std::max( g.pins[i].rise_block_delay, g.pins[i].fall_block_delay ) );
+            } );
+          }
+          delays[n].push_back( cell_delay );
+          worst_delay = std::max( worst_delay, cell_delay );
+        }
+      }
+      else
+      {
+        worst_delay = -1;
+        return false;
+      }
+      return true;
+    } );
+
+    return worst_delay;
+  }
+
+  void report_stats( std::ostream& os = std::cout ) const
+  {
+    os << fmt::format( "[i] Report stats: area = {:>5.2f}; delay = {:>5.2f};\n", compute_area(), compute_worst_delay() );
+  }
+
+  void report_cells_usage( std::ostream& os = std::cout ) const
+  {
+    std::vector<uint32_t> cells_profile( _library.size(), 0u );
+
+    double area = 0;
+    Ntk::foreach_node( [&]( node const& n, auto ) {
+      if ( has_cell( n ) )
+      {
+        auto const& g = get_cell( n );
+        ++cells_profile[g.id];
+        area += g.area;
+      }
+    } );
+
+    os << "[i] Report cells usage:\n";
+
+    uint32_t tot_instances = 0u;
+    for ( auto i = 0u; i < cells_profile.size(); ++i )
+    {
+      if ( cells_profile[i] > 0u )
+      {
+        float tot_cell_area = cells_profile[i] * _library[i].area;
+
+        os << fmt::format( "[i] {:<25}", _library[i].name )
+           << fmt::format( "\t Instance = {:>10d}", cells_profile[i] )
+           << fmt::format( "\t Area = {:>12.2f}", tot_cell_area )
+           << fmt::format( " {:>8.2f} %\n", tot_cell_area / area * 100 );
+
+        tot_instances += cells_profile[i];
+      }
+    }
+
+    os << fmt::format( "[i] {:<25}", "TOTAL" )
+       << fmt::format( "\t Instance = {:>10d}", tot_instances )
+       << fmt::format( "\t Area = {:>12.2f}   100.00 %\n", area );
+  }
+
+private:
+  std::vector<standard_cell> const _library;
+  node_map<uint32_t, Ntk, std::unordered_map<node, uint32_t>> _cells;
+}; /* cell_view */
+
+template<class T>
+cell_view( T const& ) -> cell_view<T>;
+
+} // namespace mockturtle
\ No newline at end of file
diff --git a/include/mockturtle/views/choice_view.hpp b/include/mockturtle/views/choice_view.hpp
index d2bf6264e..8824bfbc7 100644
--- a/include/mockturtle/views/choice_view.hpp
+++ b/include/mockturtle/views/choice_view.hpp
@@ -166,7 +166,8 @@ class choice_view<Ntk, false> : public Ntk
     }
   }
 
-  void add_choice( node const& n1, node const& n2 )
+  template<typename T = Ntk>
+  std::enable_if_t<!std::is_same_v<typename T::node, typename T::signal>, void> add_choice( node const& n1, node const& n2 )
   {
     add_choice( n1, Ntk::make_signal( n2 ) );
   }
@@ -364,7 +365,8 @@ class choice_view<Ntk, false> : public Ntk
     return repr ^ Ntk::is_complemented( _choice_phase->at( Ntk::node_to_index( n ) ) );
   }
 
-  signal get_choice_representative_signal( signal const& sig ) const
+  template<typename T = Ntk>
+  std::enable_if_t<!std::is_same_v<typename T::node, typename T::signal>, typename T::signal> get_choice_representative_signal( signal const& sig ) const
   {
     auto n = Ntk::get_node( sig );
     auto repr = get_choice_representative( n );
diff --git a/include/mockturtle/views/dont_touch_view.hpp b/include/mockturtle/views/dont_touch_view.hpp
new file mode 100644
index 000000000..7f70712c7
--- /dev/null
+++ b/include/mockturtle/views/dont_touch_view.hpp
@@ -0,0 +1,142 @@
+/* mockturtle: C++ logic network library
+ * Copyright (C) 2018-2023  EPFL
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*!
+  \file dont_touch_view.hpp
+  \brief Select nodes to be "don't touch"
+
+  \author Alessandro Tempia Calvino
+*/
+
+#pragma once
+
+#include "../traits.hpp"
+
+#include <type_traits>
+#include <unordered_set>
+
+namespace mockturtle
+{
+
+/*! \brief Mark nodes as "don't touch".
+ *
+ * This view adds methods to mark nodes as don't touch. A don't touch node
+ * will be skipped during logic optimization or mapping.
+ * It always adds the functions `select_dont_touch`, `remove_dont_touch`,
+ * `is_dont_touch`.
+ *
+ * **Required network functions:**
+ * - `size`
+ *
+ * Example
+ *
+   \verbatim embed:rst
+
+   .. code-block:: c++
+
+      // create network somehow
+      klut_network klut = ...;
+      dont_touch_view klut_dont_touch{ klut };
+
+      // select dont touch nodes
+      klut_dont_touch.select_dont_touch( 20 );
+
+      // call technology mapping to map the rest of the network
+      binding_view<klut_network> res = emap( klut_dont_touch, tech_lib );
+   \endverbatim
+ */
+template<class Ntk>
+class dont_touch_view : public Ntk
+{
+public:
+  using node = typename Ntk::node;
+
+public:
+  explicit dont_touch_view()
+      : Ntk(), _dont_touch()
+  {
+    static_assert( is_network_type_v<Ntk>, "Ntk is not a network type" );
+    static_assert( has_node_to_index_v<Ntk>, "Ntk does not implement the node_to_index method" );
+  }
+
+  explicit dont_touch_view( Ntk const& ntk )
+      : Ntk( ntk ), _dont_touch()
+  {
+    static_assert( is_network_type_v<Ntk>, "Ntk is not a network type" );
+  }
+
+  dont_touch_view<Ntk>& operator=( dont_touch_view<Ntk> const& dont_touch_ntk )
+  {
+    Ntk::operator=( dont_touch_ntk );
+    _dont_touch = dont_touch_ntk._dont_touch;
+    return *this;
+  }
+
+  void select_dont_touch( node const& n )
+  {
+    _dont_touch.insert( Ntk::node_to_index( n ) );
+  }
+
+  void remove_dont_touch( node const& n )
+  {
+    if ( auto it = _dont_touch.find( Ntk::node_to_index( n ) ); it != _dont_touch.end() )
+    {
+      _dont_touch.erase( it );
+    }
+  }
+
+  bool is_dont_touch( node const& n ) const
+  {
+    return _dont_touch.find( Ntk::node_to_index( n ) ) != _dont_touch.end();
+  }
+
+  template<typename Fn>
+  void foreach_dont_touch( Fn&& fn ) const
+  {
+    constexpr auto is_bool_f = std::is_invocable_r_v<bool, Fn, node>;
+    constexpr auto is_void_f = std::is_invocable_r_v<void, Fn, node>;
+
+    for ( auto el : _dont_touch )
+    {
+      if constexpr ( is_bool_f )
+      {
+        if ( !fn( Ntk::index_to_node( el ) ) )
+          return;
+      }
+      else
+      {
+        fn( Ntk::index_to_node( el ) );
+      }
+    }
+  }
+
+private:
+  std::unordered_set<uint32_t> _dont_touch;
+}; /* dont_touch_view */
+
+template<class T>
+dont_touch_view( T const& ) -> dont_touch_view<T>;
+
+} // namespace mockturtle
diff --git a/lib/kitty/kitty/npn.hpp b/lib/kitty/kitty/npn.hpp
index 58c0fdbf0..fefad2484 100644
--- a/lib/kitty/kitty/npn.hpp
+++ b/lib/kitty/kitty/npn.hpp
@@ -27,6 +27,7 @@
   \file npn.hpp
   \brief Implements NPN canonization algorithms
 
+  \author Alessandro Tempia Calvino
   \author Mathias Soeken
 */
 
@@ -334,6 +335,81 @@ std::tuple<TT, uint32_t> exact_n_canonization( const TT& tt, Callback&& fn = det
   return std::make_tuple( tmin, phase );
 }
 
+/*! \brief Exact N canonization given a support size
+
+  Given a truth table, this function finds the lexicographically smallest truth
+  table in its N class, called N representative. Two functions are in the
+  same N class, if one can obtain one from the other by input negations.
+
+  The function can accept a callback as second parameter which is called for
+  every visited function when trying out all combinations.  This allows to
+  exhaustively visit the whole N class.
+
+  The function returns a N configuration which contains the necessary
+  transformations to obtain the representative.  It is a tuple of
+
+  - the N representative
+  - input negations that lead to the representative
+
+  \param tt The truth table
+  \param support_size Support size used for the canonization
+  \param fn Callback for each visited truth table in the class (default does nothing)
+  \return N configurations
+*/
+template<typename TT, typename Callback = decltype( detail::exact_npn_canonization_null_callback<TT> )>
+std::tuple<TT, uint32_t> exact_n_canonization_support( const TT& tt, uint32_t support_size, Callback&& fn = detail::exact_npn_canonization_null_callback<TT> )
+{
+  static_assert( is_complete_truth_table<TT>::value, "Can only be applied on complete truth tables." );
+
+  assert( support_size <= tt.num_vars() );
+
+  const auto num_vars = support_size;
+
+  /* Special case for n = 0 */
+  if ( num_vars == 0 )
+  {
+    return std::make_tuple( tt, 0 );
+  }
+
+  /* Special case for n = 1 */
+  if ( num_vars == 1 )
+  {
+    return std::make_tuple( tt, 0 );
+  }
+
+  assert( num_vars >= 2 && num_vars <= 6 );
+
+  auto t1 = tt;
+  auto tmin = t1;
+
+  fn( t1 );
+
+  const auto& flips = detail::flips[num_vars - 2u];
+  int best_flip = -1;
+
+  for ( std::size_t j = 0; j < flips.size(); ++j )
+  {
+    const auto pos = flips[j];
+    flip_inplace( t1, pos );
+
+    fn( t1 );
+
+    if ( t1 < tmin )
+    {
+      best_flip = static_cast<int>( j );
+      tmin = t1;
+    }
+  }
+
+  uint32_t phase = 0;
+  for ( auto i = 0; i <= best_flip; ++i )
+  {
+    phase ^= 1 << flips[i];
+  }
+
+  return std::make_tuple( tmin, phase );
+}
+
 /*! \brief Flip-swap NPN heuristic
 
   This algorithm will iteratively try to reduce the numeric value of the truth
@@ -613,6 +689,94 @@ std::tuple<TT, uint32_t, std::vector<uint8_t>> sifting_p_canonization( const TT&
   return std::make_tuple( npn, phase, perm );
 }
 
+/*! \brief Exact NPN enumeration
+
+  Given a truth table, this function enumerates all the functions in its
+  NPN class. Two functions are in the same NP class, if one can be obtained
+  from the other by input negation, input permutation, and output negation.
+
+  The function takes a callback as second parameter which is called for
+  every enumerated function. The callback should take as parameters:
+  - NPN-enumerated truth table
+  - input and output negations
+  - input permutation to apply
+
+  \param tt Truth table
+  \param fn Callback for each enumerated truth table in the NP class
+*/
+template<typename TT, typename Callback>
+void exact_npn_enumeration( const TT& tt, Callback&& fn )
+{
+  static_assert( is_complete_truth_table<TT>::value, "Can only be applied on complete truth tables." );
+
+  const auto num_vars = tt.num_vars();
+
+  /* Special case for n = 0 */
+  if ( num_vars == 0 )
+  {
+    fn( tt, 0u, std::vector<uint8_t>{} );
+    fn( ~tt, 1u, std::vector<uint8_t>{} );
+    return;
+  }
+
+  /* Special case for n = 1 */
+  if ( num_vars == 1 )
+  {
+    fn( tt, 0u, std::vector<uint8_t>{ 0 } );
+    fn( ~tt, 2u, std::vector<uint8_t>{ 0 } );
+    return;
+  }
+
+  assert( num_vars >= 2 && num_vars <= 6 );
+
+  auto t1 = tt;
+
+  std::vector<uint8_t> perm( num_vars );
+  std::iota( perm.begin(), perm.end(), 0u );
+
+  uint32_t phase = 0;
+
+  fn( t1, phase, perm );
+
+  const auto& swaps = detail::swaps[num_vars - 2u];
+  const auto& flips = detail::flips[num_vars - 2u];
+
+  for ( std::size_t i = 0; i < swaps.size(); ++i )
+  {
+    const auto pos = swaps[i];
+    swap_adjacent_inplace( t1, pos );
+
+    std::swap( perm[pos], perm[pos + 1] );
+
+    fn( t1, phase, perm );
+    fn( ~t1, phase | ( 1u << num_vars ), perm );
+  }
+
+  for ( std::size_t j = 0; j < flips.size(); ++j )
+  {
+    const auto pos = flips[j];
+    swap_adjacent_inplace( t1, 0 );
+    flip_inplace( t1, pos );
+
+    std::swap( perm[0], perm[1] );
+    phase ^= 1 << perm[pos];
+
+    fn( t1, phase, perm );
+    fn( ~t1, phase | ( 1u << num_vars ), perm );
+
+    for ( std::size_t i = 0; i < swaps.size(); ++i )
+    {
+      const auto pos = swaps[i];
+      swap_adjacent_inplace( t1, pos );
+
+      std::swap( perm[pos], perm[pos + 1] );
+
+      fn( t1, phase, perm );
+      fn( ~t1, phase | ( 1u << num_vars ), perm );
+    }
+  }
+}
+
 /*! \brief Exact NP enumeration
 
   Given a truth table, this function enumerates all the functions in its
@@ -696,6 +860,102 @@ void exact_np_enumeration( const TT& tt, Callback&& fn )
   }
 }
 
+/*! \brief Exact multi NP enumeration
+
+  Given multiple truth tables, this function enumerates all the functions in their
+  NP class. Two functions are in the same NP class, if one can be obtained
+  from the other by input negation and input permutation.
+
+  The function takes a callback as second parameter which is called for
+  every enumerated function. The callback should take as parameters:
+  - NP-enumerated truth tables
+  - input negations
+  - input permutation to apply
+
+  \param tts Truth tables
+  \param fn Callback for each enumerated truth table in the NP class
+*/
+template<typename TT, typename Callback>
+void exact_multi_np_enumeration( const std::vector<TT>& tts, Callback&& fn )
+{
+  static_assert( is_complete_truth_table<TT>::value, "Can only be applied on complete truth tables." );
+
+  assert( tts.size() > 0 );
+
+  const auto num_vars = tts[0].num_vars();
+
+  for ( auto i = 0; i < tts.size(); ++i )
+    assert( tts[i].num_vars() == num_vars );
+
+  /* Special case for n = 0 */
+  if ( num_vars == 0 )
+  {
+    fn( tts, 0u, std::vector<uint8_t>{} );
+    return;
+  }
+
+  /* Special case for n = 1 */
+  if ( num_vars == 1 )
+  {
+    fn( tts, 0u, std::vector<uint8_t>{ 0 } );
+    return;
+  }
+
+  assert( num_vars >= 2 && num_vars <= 6 );
+
+  auto t1 = tts;
+
+  std::vector<uint8_t> perm( num_vars );
+  std::iota( perm.begin(), perm.end(), 0u );
+
+  uint32_t phase = 0;
+
+  fn( t1, phase, perm );
+
+  const auto& swaps = detail::swaps[num_vars - 2u];
+  const auto& flips = detail::flips[num_vars - 2u];
+
+  for ( std::size_t i = 0; i < swaps.size(); ++i )
+  {
+    const auto pos = swaps[i];
+
+    for ( auto& tt : t1 )
+      swap_adjacent_inplace( tt, pos );
+
+    std::swap( perm[pos], perm[pos + 1] );
+
+    fn( t1, phase, perm );
+  }
+
+  for ( std::size_t j = 0; j < flips.size(); ++j )
+  {
+    const auto pos = flips[j];
+
+    for ( auto& tt : t1 )
+    {
+      swap_adjacent_inplace( tt, 0 );
+      flip_inplace( tt, pos );
+    }
+
+    std::swap( perm[0], perm[1] );
+    phase ^= 1 << perm[pos];
+
+    fn( t1, phase, perm );
+
+    for ( std::size_t i = 0; i < swaps.size(); ++i )
+    {
+      const auto pos = swaps[i];
+
+      for ( auto& tt : t1 )
+        swap_adjacent_inplace( tt, pos );
+
+      std::swap( perm[pos], perm[pos + 1] );
+
+      fn( t1, phase, perm );
+    }
+  }
+}
+
 /*! \brief Exact P enumeration
 
   Given a truth table, this function enumerates all the functions in its
@@ -753,6 +1013,70 @@ void exact_p_enumeration( const TT& tt, Callback&& fn )
   }
 }
 
+/*! \brief Exact multi P enumeration
+
+  Given multiple truth tables, this function enumerates all the functions in their
+  P class. Two functions are in the same P class, if one can be obtained
+  from the other by input permutation.
+
+  The function takes a callback as second parameter which is called for
+  every enumerated function. The callback should take as parameters:
+  - P-enumerated truth tables
+  - input permutation to apply
+
+  \param tt Truth tables
+  \param fn Callback for each enumerated truth table in the P class
+*/
+template<typename TT, typename Callback>
+void exact_multi_p_enumeration( const std::vector<TT>& tts, Callback&& fn )
+{
+  static_assert( is_complete_truth_table<TT>::value, "Can only be applied on complete truth tables." );
+
+  assert( tts.size() > 0 );
+
+  const auto num_vars = tts[0].num_vars();
+
+  for ( auto i = 0; i < tts.size(); ++i )
+    assert( tts[i].num_vars == num_vars );
+
+  /* Special case for n = 0 */
+  if ( num_vars == 0 )
+  {
+    fn( tts, std::vector<uint8_t>{} );
+    return;
+  }
+
+  /* Special case for n = 1 */
+  if ( num_vars == 1 )
+  {
+    fn( tts, std::vector<uint8_t>{ 0 } );
+    return;
+  }
+
+  assert( num_vars >= 2 && num_vars <= 6 );
+
+  auto t1 = tts;
+
+  std::vector<uint8_t> perm( num_vars );
+  std::iota( perm.begin(), perm.end(), 0u );
+
+  fn( t1, perm );
+
+  const auto& swaps = detail::swaps[num_vars - 2u];
+
+  for ( std::size_t i = 0; i < swaps.size(); ++i )
+  {
+    const auto pos = swaps[i];
+
+    for ( auto& tt : t1 )
+      swap_adjacent_inplace( tt, pos );
+
+    std::swap( perm[pos], perm[pos + 1] );
+
+    fn( t1, perm );
+  }
+}
+
 /*! \brief Exact N enumeration
 
   Given a truth table, this function enumerates all the functions in its
@@ -943,4 +1267,61 @@ TT create_from_npn_config( const std::tuple<TT, uint32_t, std::vector<uint8_t>>&
   return res;
 }
 
+/*! \brief Obtain truth table applying a NPN configuration
+
+  Given an NPN configuration composed of input/output negations,
+  and input permutations this function applies the transformation
+  to the input truth table. This function can be used to obtain
+  the NPN representative function given the NPN transformation.
+  This function is the inverse of `create_from_npn_config`.
+
+  \param from truth table
+  \param phase input/output negations to apply
+  \param perm input permutations to apply
+*/
+template<typename TT>
+TT apply_npn_transformation( TT const& from, uint32_t phase, std::vector<uint8_t> const& perm )
+{
+  static_assert( is_complete_truth_table<TT>::value, "Can only be applied on complete truth tables." );
+
+  /* transpose the permutation vector */
+  std::vector<uint8_t> perm_transposed( perm.size() );
+  for ( auto i = 0; i < perm.size(); ++i )
+    perm_transposed[perm[i]] = i;
+
+  const auto num_vars = from.num_vars();
+
+  /* is output complemented? */
+  auto res = ( ( phase >> num_vars ) & 1 ) ? ~from : from;
+
+  /* input complementations */
+  for ( auto i = 0u; i < num_vars; ++i )
+  {
+    if ( ( phase >> i ) & 1 )
+    {
+      flip_inplace( res, i );
+    }
+  }
+
+  /* input permutations */
+  for ( auto i = 0u; i < num_vars; ++i )
+  {
+    if ( perm_transposed[i] == i )
+    {
+      continue;
+    }
+
+    int k = i;
+    while ( perm_transposed[k] != i )
+    {
+      ++k;
+    }
+
+    swap_inplace( res, i, k );
+    std::swap( perm_transposed[i], perm_transposed[k] );
+  }
+
+  return res;
+}
+
 } /* namespace kitty */
\ No newline at end of file
diff --git a/test/algorithms/experimental/emap.cpp b/test/algorithms/experimental/emap.cpp
new file mode 100644
index 000000000..b5db438b3
--- /dev/null
+++ b/test/algorithms/experimental/emap.cpp
@@ -0,0 +1,683 @@
+#include <catch.hpp>
+
+#include <cstdint>
+#include <vector>
+
+#include <lorina/genlib.hpp>
+#include <lorina/super.hpp>
+#include <mockturtle/algorithms/experimental/emap.hpp>
+#include <mockturtle/generators/arithmetic.hpp>
+#include <mockturtle/io/genlib_reader.hpp>
+#include <mockturtle/io/super_reader.hpp>
+#include <mockturtle/networks/aig.hpp>
+#include <mockturtle/networks/block.hpp>
+#include <mockturtle/networks/klut.hpp>
+#include <mockturtle/utils/tech_library.hpp>
+#include <mockturtle/views/binding_view.hpp>
+#include <mockturtle/views/cell_view.hpp>
+#include <mockturtle/views/dont_touch_view.hpp>
+
+using namespace mockturtle;
+
+std::string const test_library = "GATE   inv1    1 O=!a;            PIN * INV 1 999 0.9 0.3 0.9 0.3\n"
+                                 "GATE   inv2    2 O=!a;            PIN * INV 2 999 1.0 0.1 1.0 0.1\n"
+                                 "GATE   nand2   2 O=!(a*b);        PIN * INV 1 999 1.0 0.2 1.0 0.2\n"
+                                 "GATE   xor2    5 O=a^b;           PIN * UNKNOWN 2 999 1.9 0.5 1.9 0.5\n"
+                                 "GATE   mig3    3 O=a*b+a*c+b*c;   PIN * INV 1 999 2.0 0.2 2.0 0.2\n"
+                                 "GATE   buf     2 O=a;             PIN * NONINV 1 999 1.0 0.0 1.0 0.0\n"
+                                 "GATE   zero    0 O=CONST0;\n"
+                                 "GATE   one     0 O=CONST1;\n"
+                                 "GATE   ha      5 O=a*b;           PIN * INV 1 999 1.7 0.4 1.7 0.4\n"
+                                 "GATE   ha      5 O=!a*b+a*!b;     PIN * INV 1 999 2.1 0.4 2.1 0.4\n"
+                                 "GATE   fa      6 O=a*b+a*c+b*c;   PIN * INV 1 999 2.1 0.4 2.1 0.4\n"
+                                 "GATE   fa      6 O=a^b^c;         PIN * INV 1 999 3.0 0.4 3.0 0.4";
+
+std::string const super_library = "test.genlib\n"
+                                  "3\n"
+                                  "2\n"
+                                  "6\n"
+                                  "* nand2 1 0\n"
+                                  "inv1 3\n"
+                                  "* nand2 2 4\n"
+                                  "\0";
+
+TEST_CASE( "Emap on MAJ3", "[emap]" )
+{
+  std::vector<gate> gates;
+
+  std::istringstream in( test_library );
+  auto result = lorina::read_genlib( in, genlib_reader( gates ) );
+  CHECK( result == lorina::return_code::success );
+
+  tech_library<3> lib( gates );
+
+  aig_network aig;
+  const auto a = aig.create_pi();
+  const auto b = aig.create_pi();
+  const auto c = aig.create_pi();
+
+  const auto f = aig.create_maj( a, b, c );
+  aig.create_po( f );
+
+  emap_params ps;
+  emap_stats st;
+  binding_view<klut_network> luts = emap( aig, lib, ps, &st );
+
+  CHECK( luts.size() == 6u );
+  CHECK( luts.num_pis() == 3u );
+  CHECK( luts.num_pos() == 1u );
+  CHECK( luts.num_gates() == 1u );
+  CHECK( st.area == 3.0f );
+  CHECK( st.delay == 2.0f );
+}
+
+TEST_CASE( "Emap on bad MAJ3 and constant output", "[emap]" )
+{
+  std::vector<gate> gates;
+
+  std::istringstream in( test_library );
+  auto result = lorina::read_genlib( in, genlib_reader( gates ) );
+  CHECK( result == lorina::return_code::success );
+
+  tech_library<3> lib( gates );
+
+  aig_network aig;
+  const auto a = aig.create_pi();
+  const auto b = aig.create_pi();
+  const auto c = aig.create_pi();
+
+  const auto f = aig.create_maj( a, aig.create_maj( a, b, c ), c );
+  aig.create_po( f );
+  aig.create_po( aig.get_constant( true ) );
+
+  emap_params ps;
+  emap_stats st;
+  binding_view<klut_network> luts = emap( aig, lib, ps, &st );
+
+  CHECK( luts.size() == 6u );
+  CHECK( luts.num_pis() == 3u );
+  CHECK( luts.num_pos() == 2u );
+  CHECK( luts.num_gates() == 1u );
+  CHECK( st.area == 3.0f );
+  CHECK( st.delay == 2.0f );
+}
+
+TEST_CASE( "Emap on full adder 1", "[emap]" )
+{
+  std::vector<gate> gates;
+
+  std::istringstream in( test_library );
+  auto result = lorina::read_genlib( in, genlib_reader( gates ) );
+  CHECK( result == lorina::return_code::success );
+
+  tech_library<3> lib( gates );
+
+  aig_network aig;
+  const auto a = aig.create_pi();
+  const auto b = aig.create_pi();
+  const auto c = aig.create_pi();
+
+  const auto [sum, carry] = full_adder( aig, a, b, c );
+  aig.create_po( sum );
+  aig.create_po( carry );
+
+  emap_params ps;
+  emap_stats st;
+  binding_view<klut_network> luts = emap( aig, lib, ps, &st );
+
+  const float eps{ 0.005f };
+
+  CHECK( luts.size() == 8u );
+  CHECK( luts.num_pis() == 3u );
+  CHECK( luts.num_pos() == 2u );
+  CHECK( luts.num_gates() == 3u );
+  CHECK( st.area > 13.0f - eps );
+  CHECK( st.area < 13.0f + eps );
+  CHECK( st.delay > 3.8f - eps );
+  CHECK( st.delay < 3.8f + eps );
+}
+
+TEST_CASE( "Emap on full adder 2", "[emap]" )
+{
+  std::vector<gate> gates;
+
+  std::istringstream in( test_library );
+  auto result = lorina::read_genlib( in, genlib_reader( gates ) );
+  CHECK( result == lorina::return_code::success );
+
+  tech_library<3, classification_type::p_configurations> lib( gates );
+
+  aig_network aig;
+  const auto a = aig.create_pi();
+  const auto b = aig.create_pi();
+  const auto c = aig.create_pi();
+
+  const auto [sum, carry] = full_adder( aig, a, b, c );
+  aig.create_po( sum );
+  aig.create_po( carry );
+
+  emap_params ps;
+  ps.cut_enumeration_ps.minimize_truth_table = false;
+  ps.use_fast_area_recovery = false;
+  ps.ela_rounds = 0;
+  ps.eswp_rounds = 2;
+  emap_stats st;
+  binding_view<klut_network> luts = emap( aig, lib, ps, &st );
+
+  const float eps{ 0.005f };
+
+  CHECK( luts.size() == 8u );
+  CHECK( luts.num_pis() == 3u );
+  CHECK( luts.num_pos() == 2u );
+  CHECK( luts.num_gates() == 3u );
+  CHECK( st.area > 13.0f - eps );
+  CHECK( st.area < 13.0f + eps );
+  CHECK( st.delay > 3.8f - eps );
+  CHECK( st.delay < 3.8f + eps );
+}
+
+TEST_CASE( "Emap on full adder 1 with cells", "[emap]" )
+{
+  std::vector<gate> gates;
+
+  std::istringstream in( test_library );
+  auto result = lorina::read_genlib( in, genlib_reader( gates ) );
+  CHECK( result == lorina::return_code::success );
+
+  tech_library<3> lib( gates );
+
+  aig_network aig;
+  const auto a = aig.create_pi();
+  const auto b = aig.create_pi();
+  const auto c = aig.create_pi();
+
+  const auto [sum, carry] = full_adder( aig, a, b, c );
+  aig.create_po( sum );
+  aig.create_po( carry );
+
+  emap_params ps;
+  emap_stats st;
+  cell_view<block_network> luts = emap_block( aig, lib, ps, &st );
+
+  const float eps{ 0.005f };
+
+  CHECK( luts.size() == 8u );
+  CHECK( luts.num_pis() == 3u );
+  CHECK( luts.num_pos() == 2u );
+  CHECK( luts.num_gates() == 3u );
+  CHECK( st.area > 13.0f - eps );
+  CHECK( st.area < 13.0f + eps );
+  CHECK( st.delay > 3.8f - eps );
+  CHECK( st.delay < 3.8f + eps );
+}
+
+TEST_CASE( "Emap on full adder 2 with cells", "[emap]" )
+{
+  std::vector<gate> gates;
+
+  std::istringstream in( test_library );
+  auto result = lorina::read_genlib( in, genlib_reader( gates ) );
+  CHECK( result == lorina::return_code::success );
+
+  tech_library<3, classification_type::p_configurations> lib( gates );
+
+  aig_network aig;
+  const auto a = aig.create_pi();
+  const auto b = aig.create_pi();
+  const auto c = aig.create_pi();
+
+  const auto [sum, carry] = full_adder( aig, a, b, c );
+  aig.create_po( sum );
+  aig.create_po( carry );
+
+  emap_params ps;
+  ps.cut_enumeration_ps.minimize_truth_table = false;
+  ps.use_fast_area_recovery = false;
+  ps.ela_rounds = 0;
+  ps.eswp_rounds = 2;
+  emap_stats st;
+  cell_view<block_network> luts = emap_block( aig, lib, ps, &st );
+
+  const float eps{ 0.005f };
+
+  CHECK( luts.size() == 8u );
+  CHECK( luts.num_pis() == 3u );
+  CHECK( luts.num_pos() == 2u );
+  CHECK( luts.num_gates() == 3u );
+  CHECK( st.area > 13.0f - eps );
+  CHECK( st.area < 13.0f + eps );
+  CHECK( st.delay > 3.8f - eps );
+  CHECK( st.delay < 3.8f + eps );
+}
+
+TEST_CASE( "Emap on ripple carry adder with multi-output gates", "[emap]" )
+{
+  std::vector<gate> gates;
+
+  std::istringstream in( test_library );
+  auto result = lorina::read_genlib( in, genlib_reader( gates ) );
+  CHECK( result == lorina::return_code::success );
+
+  tech_library_params tps;
+  tps.load_multioutput_gates_single = false;
+  tech_library<3, classification_type::p_configurations> lib( gates, tps );
+
+  aig_network aig;
+  
+  std::vector<aig_network::signal> a( 8 ), b( 8 );
+  std::generate( a.begin(), a.end(), [&aig]() { return aig.create_pi(); } );
+  std::generate( b.begin(), b.end(), [&aig]() { return aig.create_pi(); } );
+  auto carry = aig.get_constant( false );
+
+  carry_ripple_adder_inplace( aig, a, b, carry );
+
+  std::for_each( a.begin(), a.end(), [&]( auto f ) { aig.create_po( f ); } );
+  aig.create_po( carry );
+
+  emap_params ps;
+  ps.map_multioutput = true;
+  ps.area_oriented_mapping = true;
+  emap_stats st;
+  binding_view<klut_network> luts = emap( aig, lib, ps, &st );
+
+  const float eps{ 0.005f };
+
+  CHECK( luts.size() == 34u );
+  CHECK( luts.num_pis() == 16u );
+  CHECK( luts.num_pos() == 9u );
+  CHECK( luts.num_gates() == 16u );
+  CHECK( st.area > 47.0f - eps );
+  CHECK( st.area < 47.0f + eps );
+  CHECK( st.delay > 17.3f - eps );
+  CHECK( st.delay < 17.3f + eps );
+  CHECK( st.multioutput_gates == 8 );
+}
+
+TEST_CASE( "Emap on ripple carry adder with multi-output cells", "[emap]" )
+{
+  std::vector<gate> gates;
+
+  std::istringstream in( test_library );
+  auto result = lorina::read_genlib( in, genlib_reader( gates ) );
+  CHECK( result == lorina::return_code::success );
+
+  tech_library_params tps;
+  tps.load_multioutput_gates_single = false;
+  tech_library<3, classification_type::p_configurations> lib( gates, tps );
+
+  aig_network aig;
+  
+  std::vector<aig_network::signal> a( 8 ), b( 8 );
+  std::generate( a.begin(), a.end(), [&aig]() { return aig.create_pi(); } );
+  std::generate( b.begin(), b.end(), [&aig]() { return aig.create_pi(); } );
+  auto carry = aig.get_constant( false );
+
+  carry_ripple_adder_inplace( aig, a, b, carry );
+
+  std::for_each( a.begin(), a.end(), [&]( auto f ) { aig.create_po( f ); } );
+  aig.create_po( carry );
+
+  emap_params ps;
+  ps.map_multioutput = true;
+  ps.area_oriented_mapping = true;
+  emap_stats st;
+  cell_view<block_network> luts = emap_block( aig, lib, ps, &st );
+
+  const float eps{ 0.005f };
+
+  CHECK( luts.size() == 26u );
+  CHECK( luts.num_pis() == 16u );
+  CHECK( luts.num_pos() == 9u );
+  CHECK( luts.num_gates() == 8u );
+  CHECK( st.area > 47.0f - eps );
+  CHECK( st.area < 47.0f + eps );
+  CHECK( st.delay > 17.3f - eps );
+  CHECK( st.delay < 17.3f + eps );
+  CHECK( st.multioutput_gates == 8 );
+}
+
+TEST_CASE( "Emap on multiplier with multi-output gates", "[emap]" )
+{
+  std::vector<gate> gates;
+
+  std::istringstream in( test_library );
+  auto result = lorina::read_genlib( in, genlib_reader( gates ) );
+  CHECK( result == lorina::return_code::success );
+
+  tech_library_params tps;
+  tps.load_multioutput_gates_single = true;
+  tech_library<3> lib( gates, tps );
+
+  aig_network aig;
+
+  std::vector<typename aig_network::signal> a( 8 ), b( 8 );
+  std::generate( a.begin(), a.end(), [&aig]() { return aig.create_pi(); } );
+  std::generate( b.begin(), b.end(), [&aig]() { return aig.create_pi(); } );
+
+  for ( auto const& o : carry_ripple_multiplier( aig, a, b ) )
+  {
+    aig.create_po( o );
+  }
+
+  CHECK( aig.num_pis() == 16 );
+  CHECK( aig.num_pos() == 16 );
+
+  emap_params ps;
+  ps.map_multioutput = true;
+  emap_stats st;
+  binding_view<klut_network> luts = emap( aig, lib, ps, &st );
+
+  const float eps{ 0.005f };
+
+  CHECK( luts.size() == 255u );
+  CHECK( luts.num_pis() == 16u );
+  CHECK( luts.num_pos() == 16u );
+  CHECK( luts.num_gates() == 237u );
+  CHECK( st.area > 631.0f - eps );
+  CHECK( st.area < 631.0f + eps );
+  CHECK( st.delay > 33.60f - eps );
+  CHECK( st.delay < 33.60f + eps );
+  CHECK( st.multioutput_gates == 39 );
+}
+
+TEST_CASE( "Emap with inverters", "[emap]" )
+{
+  std::vector<gate> gates;
+
+  std::istringstream in( test_library );
+  auto result = lorina::read_genlib( in, genlib_reader( gates ) );
+  CHECK( result == lorina::return_code::success );
+
+  tech_library<3> lib( gates );
+
+  aig_network aig;
+  const auto a = aig.create_pi();
+  const auto b = aig.create_pi();
+  const auto c = aig.create_pi();
+
+  const auto f1 = aig.create_and( !a, b );
+  const auto f2 = aig.create_and( f1, !c );
+
+  aig.create_po( f2 );
+
+  emap_params ps;
+  emap_stats st;
+  binding_view<klut_network> luts = emap( aig, lib, ps, &st );
+
+  const float eps{ 0.005f };
+
+  CHECK( luts.size() == 11u );
+  CHECK( luts.num_pis() == 3u );
+  CHECK( luts.num_pos() == 1u );
+  CHECK( luts.num_gates() == 6u );
+  CHECK( st.area > 8.0f - eps );
+  CHECK( st.area < 8.0f + eps );
+  CHECK( st.delay > 4.7f - eps );
+  CHECK( st.delay < 4.7f + eps );
+}
+
+TEST_CASE( "Emap with inverters minimization", "[emap]" )
+{
+  std::vector<gate> gates;
+
+  std::istringstream in( test_library );
+  auto result = lorina::read_genlib( in, genlib_reader( gates ) );
+  CHECK( result == lorina::return_code::success );
+
+  tech_library<3> lib( gates );
+
+  aig_network aig;
+  const auto a = aig.create_pi();
+  const auto b = aig.create_pi();
+  const auto c = aig.create_pi();
+
+  const auto f = aig.create_maj( !a, !b, !c );
+  aig.create_po( f );
+
+  emap_params ps;
+  emap_stats st;
+  binding_view<klut_network> luts = emap( aig, lib, ps, &st );
+
+  const float eps{ 0.005f };
+
+  CHECK( luts.size() == 7u );
+  CHECK( luts.num_pis() == 3u );
+  CHECK( luts.num_pos() == 1u );
+  CHECK( luts.num_gates() == 2u );
+  CHECK( st.area > 4.0f - eps );
+  CHECK( st.area < 4.0f + eps );
+  CHECK( st.delay > 2.9f - eps );
+  CHECK( st.delay < 2.9f + eps );
+}
+
+TEST_CASE( "Emap on buffer and constant outputs", "[emap]" )
+{
+  std::vector<gate> gates;
+
+  std::istringstream in( test_library );
+  auto result = lorina::read_genlib( in, genlib_reader( gates ) );
+  CHECK( result == lorina::return_code::success );
+
+  tech_library<3, classification_type::np_configurations> lib( gates );
+
+  aig_network aig;
+  const auto a = aig.create_pi();
+  const auto b = aig.create_pi();
+  const auto c = aig.create_pi();
+  const auto d = aig.create_pi();
+
+  const auto n5 = aig.create_and( a, d );
+  const auto n6 = aig.create_and( a, !c );
+  const auto n7 = aig.create_and( !c, n5 );
+  const auto n8 = aig.create_and( c, n6 );
+  const auto n9 = aig.create_and( !n6, n7 );
+  const auto n10 = aig.create_and( n7, n8 );
+  const auto n11 = aig.create_and( a, n10 );
+  const auto n12 = aig.create_and( !d, n11 );
+  const auto n13 = aig.create_and( !d, !n7 );
+  const auto n14 = aig.create_and( !n6, !n7 );
+
+  aig.create_po( aig.get_constant( true ) );
+  aig.create_po( b );
+  aig.create_po( n9 );
+  aig.create_po( n12 );
+  aig.create_po( !n13 );
+  aig.create_po( n14 );
+
+  emap_params ps;
+  emap_stats st;
+  binding_view<klut_network> luts = emap( aig, lib, ps, &st );
+
+  const float eps{ 0.005f };
+
+  CHECK( luts.size() == 10u );
+  CHECK( luts.num_pis() == 4u );
+  CHECK( luts.num_pos() == 6u );
+  CHECK( luts.num_gates() == 4u );
+  CHECK( st.area > 7.0f - eps );
+  CHECK( st.area < 7.0f + eps );
+  CHECK( st.delay > 1.9f - eps );
+  CHECK( st.delay < 1.9f + eps );
+}
+
+TEST_CASE( "Emap with supergates", "[emap]" )
+{
+  std::vector<gate> gates;
+  super_lib super_data;
+
+  std::istringstream in_lib( test_library );
+  auto result = lorina::read_genlib( in_lib, genlib_reader( gates ) );
+  CHECK( result == lorina::return_code::success );
+
+  std::istringstream in_super( super_library );
+  result = lorina::read_super( in_super, super_reader( super_data ) );
+  CHECK( result == lorina::return_code::success );
+
+  tech_library<3, classification_type::p_configurations> lib( gates, super_data );
+
+  aig_network aig;
+  const auto a = aig.create_pi();
+  const auto b = aig.create_pi();
+  const auto c = aig.create_pi();
+
+  const auto n4 = aig.create_and( a, b );
+  const auto n5 = aig.create_and( b, c );
+  const auto f = aig.create_and( n4, n5 );
+  aig.create_po( f );
+
+  emap_params ps;
+  emap_stats st;
+  binding_view<klut_network> luts = emap( aig, lib, ps, &st );
+
+  const float eps{ 0.005f };
+
+  CHECK( luts.size() == 9u );
+  CHECK( luts.num_pis() == 3u );
+  CHECK( luts.num_pos() == 1u );
+  CHECK( luts.num_gates() == 4u );
+  CHECK( st.area == 6.0f );
+  CHECK( st.delay > 3.8f - eps );
+  CHECK( st.delay < 3.8f + eps );
+}
+
+TEST_CASE( "Emap with supergates 2", "[emap]" )
+{
+  std::vector<gate> gates;
+  super_lib super_data;
+
+  std::istringstream in_lib( test_library );
+  auto result = lorina::read_genlib( in_lib, genlib_reader( gates ) );
+  CHECK( result == lorina::return_code::success );
+
+  std::istringstream in_super( super_library );
+  result = lorina::read_super( in_super, super_reader( super_data ) );
+  CHECK( result == lorina::return_code::success );
+
+  tech_library<3, classification_type::p_configurations> lib( gates, super_data );
+
+  aig_network aig;
+  const auto a = aig.create_pi();
+  const auto b = aig.create_pi();
+  const auto c = aig.create_pi();
+
+  const auto n4 = aig.create_and( a, b );
+  const auto n5 = aig.create_and( b, c );
+  const auto f = aig.create_and( n4, n5 );
+  aig.create_po( f );
+
+  emap_params ps;
+  emap_stats st;
+  cell_view<block_network> luts = emap_block( aig, lib, ps, &st );
+
+  const float eps{ 0.005f };
+
+  CHECK( luts.size() == 9u );
+  CHECK( luts.num_pis() == 3u );
+  CHECK( luts.num_pos() == 1u );
+  CHECK( luts.num_gates() == 4u );
+  CHECK( st.area == 6.0f );
+  CHECK( st.delay > 3.8f - eps );
+  CHECK( st.delay < 3.8f + eps );
+}
+
+TEST_CASE( "Emap on circuit with don't touch gates", "[emap]" )
+{
+  std::vector<gate> gates;
+
+  std::istringstream in( test_library );
+  auto result = lorina::read_genlib( in, genlib_reader( gates ) );
+  CHECK( result == lorina::return_code::success );
+
+  tech_library<3, classification_type::np_configurations> lib( gates );
+
+  klut_network klut;
+  const auto a = klut.create_pi();
+  const auto b = klut.create_pi();
+  const auto c = klut.create_pi();
+  const auto d = klut.create_pi();
+
+  const auto n5 = klut.create_xor( c, d );
+  const auto n6 = klut.create_not( n5 );
+  const auto n7 = klut.create_xor( a, b );
+  const auto sum = klut.create_xor( n6, n7 );
+  const auto carry = klut.create_maj( a, b, n5 );
+
+  klut.create_po( sum );
+  klut.create_po( carry );
+
+  binding_view<klut_network> b_klut{ klut, gates };
+  dont_touch_view<binding_view<klut_network>> db_klut{ b_klut };
+
+  db_klut.add_binding( klut.get_node( n5 ), 3 );
+  db_klut.select_dont_touch( klut.get_node( n5 ) );
+  db_klut.add_binding( klut.get_node( n6 ), 0 );
+  db_klut.select_dont_touch( klut.get_node( n6 ) );
+
+  emap_params ps;
+  ps.map_multioutput = true;
+  ps.area_oriented_mapping = true;
+  emap_stats st;
+  binding_view<klut_network> luts = emap( klut, lib, ps, &st );
+
+  const float eps{ 0.005f };
+
+  CHECK( luts.size() == 10u );
+  CHECK( luts.num_pis() == 4u );
+  CHECK( luts.num_pos() == 2u );
+  CHECK( luts.num_gates() == 4u );
+  CHECK( st.area > 12.0f - eps );
+  CHECK( st.area < 12.0f + eps );
+  CHECK( st.delay > 5.8f - eps );
+  CHECK( st.delay < 5.8f + eps );
+}
+
+TEST_CASE( "Emap on circuit with don't touch cells", "[emap]" )
+{
+  std::vector<gate> gates;
+
+  std::istringstream in( test_library );
+  auto result = lorina::read_genlib( in, genlib_reader( gates ) );
+  CHECK( result == lorina::return_code::success );
+
+  tech_library<3, classification_type::np_configurations> lib( gates );
+
+  klut_network klut;
+  const auto a = klut.create_pi();
+  const auto b = klut.create_pi();
+  const auto c = klut.create_pi();
+  const auto d = klut.create_pi();
+
+  const auto n5 = klut.create_xor( c, d );
+  const auto n6 = klut.create_not( n5 );
+  const auto n7 = klut.create_xor( a, b );
+  const auto sum = klut.create_xor( n6, n7 );
+  const auto carry = klut.create_maj( a, b, n5 );
+
+  klut.create_po( sum );
+  klut.create_po( carry );
+
+  binding_view<klut_network> b_klut{ klut, gates };
+  dont_touch_view<binding_view<klut_network>> db_klut{ b_klut };
+
+  db_klut.add_binding( klut.get_node( n5 ), 3 );
+  db_klut.select_dont_touch( klut.get_node( n5 ) );
+  db_klut.add_binding( klut.get_node( n6 ), 0 );
+  db_klut.select_dont_touch( klut.get_node( n6 ) );
+
+  emap_params ps;
+  ps.map_multioutput = true;
+  ps.area_oriented_mapping = true;
+  emap_stats st;
+  cell_view<block_network> luts = emap_block( klut, lib, ps, &st );
+
+  const float eps{ 0.005f };
+
+  CHECK( luts.size() == 9u );
+  CHECK( luts.num_pis() == 4u );
+  CHECK( luts.num_pos() == 2u );
+  CHECK( luts.num_gates() == 3u );
+  CHECK( st.area > 12.0f - eps );
+  CHECK( st.area < 12.0f + eps );
+  CHECK( st.delay > 5.8f - eps );
+  CHECK( st.delay < 5.8f + eps );
+}
\ No newline at end of file
diff --git a/test/algorithms/extract_adders.cpp b/test/algorithms/extract_adders.cpp
new file mode 100644
index 000000000..ea71ebf4c
--- /dev/null
+++ b/test/algorithms/extract_adders.cpp
@@ -0,0 +1,146 @@
+#include <catch.hpp>
+
+#include <cstdint>
+#include <vector>
+
+#include <mockturtle/algorithms/extract_adders.hpp>
+#include <mockturtle/generators/arithmetic.hpp>
+#include <mockturtle/networks/aig.hpp>
+#include <mockturtle/networks/block.hpp>
+
+using namespace mockturtle;
+
+TEST_CASE( "Map Adders on AIG with no adders", "[extract_adders]" )
+{
+  aig_network aig;
+  const auto a = aig.create_pi();
+  const auto b = aig.create_pi();
+  const auto c = aig.create_pi();
+
+  const auto f = aig.create_maj( a, b, c );
+  aig.create_po( f );
+
+  extract_adders_params ps;
+  extract_adders_stats st;
+  block_network luts = extract_adders( aig, ps, &st );
+
+  CHECK( luts.size() == 9u );
+  CHECK( luts.num_pis() == 3u );
+  CHECK( luts.num_pos() == 1u );
+  CHECK( luts.num_gates() == 4u );
+  CHECK( st.maj3 == 1u );
+  CHECK( st.mapped_fa + st.mapped_ha == 0u );
+}
+
+TEST_CASE( "Map Adders on full adder 1", "[extract_adders]" )
+{
+  aig_network aig;
+  const auto a = aig.create_pi();
+  const auto b = aig.create_pi();
+  const auto c = aig.create_pi();
+
+  const auto [sum, carry] = full_adder( aig, a, b, c );
+  aig.create_po( sum );
+  aig.create_po( carry );
+
+  extract_adders_params ps;
+  extract_adders_stats st;
+  block_network luts = extract_adders( aig, ps, &st );
+
+  CHECK( luts.size() == 6u );
+  CHECK( luts.num_pis() == 3u );
+  CHECK( luts.num_pos() == 2u );
+  CHECK( luts.num_gates() == 1u );
+  CHECK( st.maj3 == 1u );
+  CHECK( st.xor3 == 1u );
+  CHECK( st.mapped_ha == 0u );
+  CHECK( st.mapped_fa == 1u );
+}
+
+TEST_CASE( "Map Adders on full adder 2", "[extract_adders]" )
+{
+  aig_network aig;
+  const auto a = aig.create_pi();
+  const auto b = aig.create_pi();
+  const auto c = aig.create_pi();
+
+  const auto [sum, carry] = full_adder( aig, a, b, c );
+  aig.create_po( sum );
+  aig.create_po( carry );
+
+  extract_adders_params ps;
+  ps.map_inverted = true;
+  extract_adders_stats st;
+  block_network luts = extract_adders( aig, ps, &st );
+
+  CHECK( luts.size() == 6u );
+  CHECK( luts.num_pis() == 3u );
+  CHECK( luts.num_pos() == 2u );
+  CHECK( luts.num_gates() == 1u );
+  CHECK( st.maj3 == 1u );
+  CHECK( st.xor3 == 1u );
+  CHECK( st.mapped_ha == 0u );
+  CHECK( st.mapped_fa == 1u );
+}
+
+TEST_CASE( "Map adders on ripple carry adder", "[extract_adders]" )
+{
+  aig_network aig;
+  
+  std::vector<aig_network::signal> a( 8 ), b( 8 );
+  std::generate( a.begin(), a.end(), [&aig]() { return aig.create_pi(); } );
+  std::generate( b.begin(), b.end(), [&aig]() { return aig.create_pi(); } );
+  auto carry = aig.get_constant( false );
+
+  carry_ripple_adder_inplace( aig, a, b, carry );
+
+  std::for_each( a.begin(), a.end(), [&]( auto f ) { aig.create_po( f ); } );
+  aig.create_po( carry );
+
+  extract_adders_params ps;
+  extract_adders_stats st;
+  block_network luts = extract_adders( aig, ps, &st );
+
+  CHECK( luts.size() == 26u );
+  CHECK( luts.num_pis() == 16u );
+  CHECK( luts.num_pos() == 9u );
+  CHECK( luts.num_gates() == 8u );
+  CHECK( st.and2 == 52u );
+  CHECK( st.xor2 == 15u );
+  CHECK( st.maj3 == 7u );
+  CHECK( st.xor3 == 7u );
+  CHECK( st.mapped_ha == 1u );
+  CHECK( st.mapped_fa == 7u );
+}
+
+TEST_CASE( "Map adders on multiplier", "[extract_adders]" )
+{
+  aig_network aig;
+
+  std::vector<typename aig_network::signal> a( 8 ), b( 8 );
+  std::generate( a.begin(), a.end(), [&aig]() { return aig.create_pi(); } );
+  std::generate( b.begin(), b.end(), [&aig]() { return aig.create_pi(); } );
+
+  for ( auto const& o : carry_ripple_multiplier( aig, a, b ) )
+  {
+    aig.create_po( o );
+  }
+
+  CHECK( aig.num_pis() == 16 );
+  CHECK( aig.num_pos() == 16 );
+
+  extract_adders_params ps;
+  extract_adders_stats st;
+  block_network luts = extract_adders( aig, ps, &st );
+
+  CHECK( luts.size() == 138u );
+  CHECK( luts.num_pis() == 16u );
+  CHECK( luts.num_pos() == 16u );
+  CHECK( luts.num_gates() == 120u );
+  CHECK( st.and2 == 424u );
+  CHECK( st.xor2 == 104u );
+  CHECK( st.maj3 == 48u );
+  CHECK( st.xor3 == 90u );
+  CHECK( st.mapped_ha == 8u );
+  CHECK( st.mapped_fa == 48u );
+}
diff --git a/test/networks/block.cpp b/test/networks/block.cpp
new file mode 100644
index 000000000..05cbe7d2f
--- /dev/null
+++ b/test/networks/block.cpp
@@ -0,0 +1,437 @@
+#include <catch.hpp>
+
+#include <vector>
+
+#include <mockturtle/networks/block.hpp>
+
+#include <kitty/constructors.hpp>
+#include <kitty/dynamic_truth_table.hpp>
+#include <kitty/operations.hpp>
+#include <kitty/operators.hpp>
+
+using namespace mockturtle;
+
+TEST_CASE( "create and use constants in a block network", "[block_net]" )
+{
+  block_network block_net;
+
+  CHECK( has_size_v<block_network> );
+  CHECK( has_get_constant_v<block_network> );
+  CHECK( has_is_constant_v<block_network> );
+  CHECK( has_is_pi_v<block_network> );
+  CHECK( has_is_constant_v<block_network> );
+  CHECK( has_get_node_v<block_network> );
+  CHECK( has_is_complemented_v<block_network> );
+
+  CHECK( block_net.size() == 2 );
+
+  auto c0 = block_net.get_constant( false );
+  auto c1 = block_net.get_constant( true );
+
+  CHECK( block_net.size() == 2 );
+  CHECK( c0 != c1 );
+  CHECK( block_net.get_node( c0 ) == 0 );
+  CHECK( block_net.get_node( c1 ) == 1 );
+  CHECK( !block_net.is_complemented( c0 ) );
+  CHECK( !block_net.is_complemented( c1 ) );
+  CHECK( block_net.is_constant( block_net.get_node( c0 ) ) );
+  CHECK( block_net.is_constant( block_net.get_node( c1 ) ) );
+  CHECK( !block_net.is_pi( block_net.get_node( c1 ) ) );
+  CHECK( !block_net.is_pi( block_net.get_node( c1 ) ) );
+}
+
+TEST_CASE( "create and use primary inputs in a block network", "[block_net]" )
+{
+  block_network block_net;
+
+  CHECK( has_create_pi_v<block_network> );
+  CHECK( has_is_constant_v<block_network> );
+  CHECK( has_is_pi_v<block_network> );
+  CHECK( has_num_pis_v<block_network> );
+
+  CHECK( block_net.num_pis() == 0 );
+
+  auto x1 = block_net.create_pi();
+  auto x2 = block_net.create_pi();
+
+  CHECK( block_net.size() == 4 );
+  CHECK( block_net.num_pis() == 2 );
+  CHECK( x1 != x2 );
+}
+
+TEST_CASE( "create and use primary outputs in a block network", "[block_net]" )
+{
+  block_network block_net;
+
+  CHECK( has_create_po_v<block_network> );
+  CHECK( has_num_pos_v<block_network> );
+
+  auto c0 = block_net.get_constant( false );
+  auto c1 = block_net.get_constant( true );
+  auto x = block_net.create_pi();
+
+  block_net.create_po( c0 );
+  block_net.create_po( c1 );
+  block_net.create_po( x );
+
+  CHECK( block_net.size() == 3 );
+  CHECK( block_net.num_pis() == 1 );
+  CHECK( block_net.num_pos() == 3 );
+}
+
+TEST_CASE( "create unary operations in a block network", "[block_net]" )
+{
+  block_network block_net;
+
+  CHECK( has_create_buf_v<block_network> );
+  CHECK( has_create_not_v<block_network> );
+
+  auto x1 = block_net.create_pi();
+
+  CHECK( block_net.size() == 3 );
+
+  auto f1 = block_net.create_buf( x1 );
+  auto f2 = block_net.create_not( x1 );
+
+  CHECK( block_net.size() == 4 );
+  CHECK( f1 == x1 );
+  CHECK( f2 != x1 );
+}
+
+TEST_CASE( "create binary operations in a block network", "[block_net]" )
+{
+  block_network block_net;
+
+  CHECK( has_create_and_v<block_network> );
+
+  const auto x1 = block_net.create_pi();
+  const auto x2 = block_net.create_pi();
+
+  CHECK( block_net.size() == 4 );
+
+  block_net.create_and( x1, x2 );
+  CHECK( block_net.size() == 5 );
+
+  block_net.create_and( x1, x2 );
+  CHECK( block_net.size() == 6 );
+
+  block_net.create_and( x2, x1 );
+  CHECK( block_net.size() == 7 );
+}
+
+TEST_CASE( "create multi-output operations in a block network", "[block_net]" )
+{
+  block_network block_net;
+
+  const auto x1 = block_net.create_pi();
+  const auto x2 = block_net.create_pi();
+  const auto x3 = block_net.create_pi();
+
+  CHECK( block_net.size() == 5 );
+
+  block_net.create_ha( x1, x2 );
+  CHECK( block_net.size() == 6 );
+
+  block_net.create_fa( x1, x2, x3 );
+  CHECK( block_net.size() == 7 );
+}
+
+TEST_CASE( "clone a block network", "[block_net]" )
+{
+  CHECK( has_clone_v<block_network> );
+
+  block_network ntk1;
+  auto a = ntk1.create_pi();
+  auto b = ntk1.create_pi();
+  auto f1 = ntk1.create_and( a, b );
+  auto f2 = ntk1.create_ha( a, b );
+  ntk1.create_po( f1 );
+  ntk1.create_po( f2 );
+  ntk1.create_po( ntk1.next_output_pin( f2 ) );
+  CHECK( ntk1.size() == 6 );
+  CHECK( ntk1.num_gates() == 2 );
+  CHECK( ntk1.num_pos() == 3 );
+
+  auto ntk2 = ntk1;
+  auto ntk3 = ntk1.clone();
+
+  auto c = ntk2.create_pi();
+  auto f3 = ntk2.create_or( f2, c );
+  ntk2.create_po( f3 );
+  CHECK( ntk1.size() == 8 );
+  CHECK( ntk1.num_gates() == 3 );
+  CHECK( ntk1.num_pos() == 4 );
+
+  CHECK( ntk3.size() == 7 );
+  CHECK( ntk3.num_gates() == 3 );
+  CHECK( ntk3.num_pos() == 3 );
+}
+
+TEST_CASE( "clone a node in a block network", "[block_net]" )
+{
+  block_network block_net1, block_net2;
+
+  CHECK( has_clone_node_v<block_network> );
+
+  auto a1 = block_net1.create_pi();
+  auto b1 = block_net1.create_pi();
+  auto f1 = block_net1.create_and( a1, b1 );
+  auto f2 = block_net1.create_ha( a1, b1 );
+  CHECK( block_net1.size() == 6 );
+
+  auto a2 = block_net2.create_pi();
+  auto b2 = block_net2.create_pi();
+  CHECK( block_net2.size() == 4 );
+
+  auto f3 = block_net2.clone_node( block_net1, block_net1.get_node( f1 ), { a2, b2 } );
+  CHECK( block_net2.size() == 5 );
+  CHECK( block_net2.num_outputs( block_net2.get_node( f3 ) ) == 1 );
+
+  auto f4 = block_net2.clone_node( block_net1, block_net1.get_node( f2 ), { a2, b2 } );
+  CHECK( block_net2.size() == 6 );
+  CHECK( block_net2.num_outputs( block_net2.get_node( f4 ) ) == 2 );
+
+  block_net2.foreach_fanin( block_net2.get_node( f3 ), [&]( auto const& s ) {
+    CHECK( !block_net2.is_complemented( s ) );
+  } );
+
+  block_net2.foreach_fanin( block_net2.get_node( f4 ), [&]( auto const& s ) {
+    CHECK( !block_net2.is_complemented( s ) );
+  } );
+}
+
+TEST_CASE( "No hash nodes in block network", "[block_net]" )
+{
+  block_network block_net;
+
+  const auto a = block_net.create_pi();
+  const auto b = block_net.create_pi();
+  const auto c = block_net.create_pi();
+
+  kitty::dynamic_truth_table tt_maj( 3u ), tt_xor( 3u );
+  kitty::create_from_hex_string( tt_maj, "e8" );
+  kitty::create_from_hex_string( tt_xor, "96" );
+
+  block_net.create_node( { a, b, c }, tt_maj );
+  block_net.create_node( { a, b, c }, tt_xor );
+
+  CHECK( block_net.size() == 7 );
+
+  block_net.create_node( { a, b, c }, tt_maj );
+
+  CHECK( block_net.size() == 8 );
+}
+
+TEST_CASE( "structural properties of a block network", "[block_net]" )
+{
+  block_network block_net;
+
+  CHECK( has_size_v<block_network> );
+  CHECK( has_num_pis_v<block_network> );
+  CHECK( has_num_pos_v<block_network> );
+  CHECK( has_num_gates_v<block_network> );
+  CHECK( has_fanin_size_v<block_network> );
+  CHECK( has_fanout_size_v<block_network> );
+
+  const auto x1 = block_net.create_pi();
+  const auto x2 = block_net.create_pi();
+
+  const auto f1 = block_net.create_and( x1, x2 );
+  const auto f2 = block_net.create_and( x2, x1 );
+
+  block_net.create_po( f1 );
+  block_net.create_po( f2 );
+
+  CHECK( block_net.size() == 6 );
+  CHECK( block_net.num_pis() == 2 );
+  CHECK( block_net.num_pos() == 2 );
+  CHECK( block_net.num_gates() == 2 );
+  CHECK( block_net.fanin_size( block_net.get_node( x1 ) ) == 0 );
+  CHECK( block_net.fanin_size( block_net.get_node( x2 ) ) == 0 );
+  CHECK( block_net.fanin_size( block_net.get_node( f1 ) ) == 2 );
+  CHECK( block_net.fanin_size( block_net.get_node( f2 ) ) == 2 );
+  CHECK( block_net.fanout_size( block_net.get_node( x1 ) ) == 2 );
+  CHECK( block_net.fanout_size( block_net.get_node( x2 ) ) == 2 );
+  CHECK( block_net.fanout_size( block_net.get_node( f1 ) ) == 1 );
+  CHECK( block_net.fanout_size( block_net.get_node( f2 ) ) == 1 );
+}
+
+TEST_CASE( "node and signal iteration in a block network", "[block_net]" )
+{
+  block_network block_net;
+
+  CHECK( has_foreach_node_v<block_network> );
+  CHECK( has_foreach_pi_v<block_network> );
+  CHECK( has_foreach_po_v<block_network> );
+  CHECK( has_foreach_fanin_v<block_network> );
+
+  const auto x1 = block_net.create_pi();
+  const auto x2 = block_net.create_pi();
+  const auto f1 = block_net.create_ha( x1, x2 );
+  const auto f2 = block_net.create_and( x2, x1 );
+  block_net.create_po( f1 );
+  block_net.create_po( f2 );
+
+  CHECK( block_net.size() == 6 );
+
+  /* iterate over nodes */
+  uint32_t mask{ 0 }, counter{ 0 };
+  block_net.foreach_node( [&]( auto n, auto i ) { mask |= ( 1 << n ); counter += i; } );
+  CHECK( mask == 63 );
+  CHECK( counter == 15 );
+
+  mask = 0;
+  block_net.foreach_node( [&]( auto n ) { mask |= ( 1 << n ); } );
+  CHECK( mask == 63 );
+
+  mask = counter = 0;
+  block_net.foreach_node( [&]( auto n, auto i ) { mask |= ( 1 << n ); counter += i; return false; } );
+  CHECK( mask == 1 );
+  CHECK( counter == 0 );
+
+  mask = 0;
+  block_net.foreach_node( [&]( auto n ) { mask |= ( 1 << n ); return false; } );
+  CHECK( mask == 1 );
+
+  /* iterate over PIs */
+  mask = counter = 0;
+  block_net.foreach_pi( [&]( auto n, auto i ) { mask |= ( 1 << n ); counter += i; } );
+  CHECK( mask == 12 );
+  CHECK( counter == 1 );
+
+  mask = 0;
+  block_net.foreach_pi( [&]( auto n ) { mask |= ( 1 << n ); } );
+  CHECK( mask == 12 );
+
+  mask = counter = 0;
+  block_net.foreach_pi( [&]( auto n, auto i ) { mask |= ( 1 << n ); counter += i; return false; } );
+  CHECK( mask == 4 );
+  CHECK( counter == 0 );
+
+  mask = 0;
+  block_net.foreach_pi( [&]( auto n ) { mask |= ( 1 << n ); return false; } );
+  CHECK( mask == 4 );
+
+  /* iterate over POs */
+  mask = counter = 0;
+  block_net.foreach_po( [&]( auto s, auto i ) { mask |= ( 1 << block_net.get_node( s ) ); counter += i; } );
+  CHECK( mask == 48 );
+  CHECK( counter == 1 );
+
+  mask = 0;
+  block_net.foreach_po( [&]( auto s ) { mask |= ( 1 << block_net.get_node( s ) ); } );
+  CHECK( mask == 48 );
+
+  mask = counter = 0;
+  block_net.foreach_po( [&]( auto s, auto i ) { mask |= ( 1 << block_net.get_node( s ) ); counter += i; return false; } );
+  CHECK( mask == 16 );
+  CHECK( counter == 0 );
+
+  mask = 0;
+  block_net.foreach_po( [&]( auto s ) { mask |= ( 1 << block_net.get_node( s ) ); return false; } );
+  CHECK( mask == 16 );
+}
+
+TEST_CASE( "custom node values in block networks", "[block_net]" )
+{
+  block_network block_net;
+
+  CHECK( has_clear_values_v<block_network> );
+  CHECK( has_value_v<block_network> );
+  CHECK( has_set_value_v<block_network> );
+  CHECK( has_incr_value_v<block_network> );
+  CHECK( has_decr_value_v<block_network> );
+
+  const auto x1 = block_net.create_pi();
+  const auto x2 = block_net.create_pi();
+  const auto f1 = block_net.create_and( x1, x2 );
+  const auto f2 = block_net.create_and( x2, x1 );
+  block_net.create_po( f1 );
+  block_net.create_po( f2 );
+
+  CHECK( block_net.size() == 6 );
+
+  block_net.clear_values();
+  block_net.foreach_node( [&]( auto n ) {
+    CHECK( block_net.value( n ) == 0 );
+    block_net.set_value( n, static_cast<uint32_t>( n ) );
+    CHECK( block_net.value( n ) == n );
+    CHECK( block_net.incr_value( n ) == n );
+    CHECK( block_net.value( n ) == n + 1 );
+    CHECK( block_net.decr_value( n ) == n );
+    CHECK( block_net.value( n ) == n );
+  } );
+  block_net.clear_values();
+  block_net.foreach_node( [&]( auto n ) {
+    CHECK( block_net.value( n ) == 0 );
+  } );
+}
+
+TEST_CASE( "visited values in block networks", "[block_net]" )
+{
+  block_network block_net;
+
+  CHECK( has_clear_visited_v<block_network> );
+  CHECK( has_visited_v<block_network> );
+  CHECK( has_set_visited_v<block_network> );
+
+  const auto x1 = block_net.create_pi();
+  const auto x2 = block_net.create_pi();
+  const auto f1 = block_net.create_and( x1, x2 );
+  const auto f2 = block_net.create_and( x2, x1 );
+  block_net.create_po( f1 );
+  block_net.create_po( f2 );
+
+  CHECK( block_net.size() == 6 );
+
+  block_net.clear_visited();
+  block_net.foreach_node( [&]( auto n ) {
+    CHECK( block_net.visited( n ) == 0 );
+    block_net.set_visited( n, static_cast<uint32_t>( n ) );
+    CHECK( block_net.visited( n ) == n );
+  } );
+  block_net.clear_visited();
+  block_net.foreach_node( [&]( auto n ) {
+    CHECK( block_net.visited( n ) == 0 );
+  } );
+}
+
+TEST_CASE( "Multi-output functions in block networks", "[block_net]" )
+{
+  block_network block_net;
+
+  CHECK( has_clear_visited_v<block_network> );
+  CHECK( has_visited_v<block_network> );
+  CHECK( has_set_visited_v<block_network> );
+
+  const auto x1 = block_net.create_pi();
+  const auto x2 = block_net.create_pi();
+  const auto x3 = block_net.create_pi();
+  const auto f1 = block_net.create_ha( x1, x2 );
+  const auto f2 = block_net.create_fa( x1, x2, x3 );
+  block_net.create_po( f1 );
+  block_net.create_po( block_net.next_output_pin( f1 ) );
+  block_net.create_po( f2 );
+  block_net.create_po( block_net.next_output_pin( f2 ) );
+
+  CHECK( block_net.size() == 7 );
+
+  CHECK( block_net.get_node( f1 ) == 5 );
+  CHECK( block_net.get_node( block_net.next_output_pin( f1 ) ) == 5 );
+  CHECK( block_net.get_node( f2 ) == 6 );
+  CHECK( block_net.get_node( block_net.next_output_pin( f2 ) ) == 6 );
+  CHECK( block_net.num_outputs( 5 ) == 2 );
+  CHECK( block_net.num_outputs( 6 ) == 2 );
+  CHECK( block_net.is_multioutput( 5 ) == true );
+  CHECK( block_net.is_multioutput( 6 ) == true );
+  CHECK( block_net.node_function_pin( 5, 0 )._bits[0] == 0x8 );
+  CHECK( block_net.node_function_pin( 5, 1 )._bits[0] == 0x6 );
+  CHECK( block_net.node_function_pin( 6, 0 )._bits[0] == 0xe8 );
+  CHECK( block_net.node_function_pin( 6, 1 )._bits[0] == 0x96 );
+
+  const auto f3 = block_net.create_and( x1, x2 );
+  CHECK( block_net.get_node( f3 ) == 7 );
+  CHECK( block_net.num_outputs( 7 ) == 1 );
+  CHECK( block_net.is_multioutput( 7 ) == false );
+  CHECK( block_net.node_function_pin( 7, 0 )._bits[0] == 0x8 );
+}
diff --git a/test/utils/struct_library.cpp b/test/utils/struct_library.cpp
new file mode 100644
index 000000000..9bcbc39d6
--- /dev/null
+++ b/test/utils/struct_library.cpp
@@ -0,0 +1,220 @@
+#include <catch.hpp>
+
+#include <algorithm>
+#include <cstdint>
+#include <vector>
+
+#include <lorina/genlib.hpp>
+#include <lorina/super.hpp>
+#include <mockturtle/io/genlib_reader.hpp>
+#include <mockturtle/io/super_reader.hpp>
+#include <mockturtle/utils/struct_library.hpp>
+#include <kitty/constructors.hpp>
+#include <kitty/dynamic_truth_table.hpp>
+#include <kitty/npn.hpp>
+#include <kitty/static_truth_table.hpp>
+
+using namespace mockturtle;
+
+std::string const test_library = "GATE   inv1    3 O=!a;               PIN * INV 3 999 1.1 0.09 1.1 0.09\n"
+                                 "GATE   inv2    2 O=!a;               PIN * INV 2 999 1.0 0.1 1.0 0.1\n"
+                                 "GATE   inv3    1 O=!a;               PIN * INV 1 999 0.9 0.3 0.9 0.3\n"
+                                 "GATE   inv4    4 O=!a;               PIN * INV 4 999 1.2 0.07 1.2 0.07\n"
+                                 "GATE   nand2   2 O=!(a*b);           PIN * INV 1 999 1.0 0.2 1.0 0.2\n"
+                                 "GATE   nand3   3 O=!(a*b*c);         PIN * INV 1 999 1.1 0.3 1.1 0.3\n"
+                                 "GATE   nand4   4 O=!(a*b*c*d);       PIN * INV 1 999 1.4 0.4 1.4 0.4\n"
+                                 "GATE   nor2    2 O=!(a+b);           PIN * INV 1 999 1.4 0.5 1.4 0.5\n"
+                                 "GATE   nor3    3 O=!(a+b+c);         PIN * INV 1 999 2.4 0.7 2.4 0.7\n"
+                                 "GATE   nor4    4 O=!(a+b+c+d);       PIN * INV 1 999 3.8 1.0 3.8 1.0\n"
+                                 "GATE   zero    0 O=CONST0;\n"
+                                 "GATE   one     0 O=CONST1;";
+
+std::string const reconv_library = "GATE   inv1    3 O=!a;               PIN * INV 3 999 1.1 0.09 1.1 0.09\n"
+                                   "GATE   nand2   2 O=!(a*b);           PIN * INV 1 999 1.0 0.2 1.0 0.2\n"
+                                   "GATE   xor2    5 O=a*!b+!a*b;        PIN * UNKNOWN 2 999 1.9 0.5 1.9 0.5\n"
+                                   "GATE   maj     6 O=a*b+a*c+b*c;      PIN * INV 1 999 2.1 0.4 2.1 0.4\n"
+                                   "GATE   zero    0 O=CONST0;\n"
+                                   "GATE   one     0 O=CONST1;";
+
+std::string const large_library = "GATE   inv1    3 O=!a;                      PIN * INV 3 999 1.1 0.09 1.1 0.09\n"
+                                  "GATE   oai322  8 O=!((a+b+c)*(d+e)*(f+g));  PIN * INV 1 999 3.0 0.4 3.0 0.4\n"
+                                  "GATE   zero    0 O=CONST0;\n"
+                                  "GATE   one     0 O=CONST1;";
+
+TEST_CASE( "Struct library creation", "[struct_library]" )
+{
+  std::vector<gate> gates;
+
+  std::istringstream in( test_library );
+  auto result = lorina::read_genlib( in, genlib_reader( gates ) );
+
+  CHECK( result == lorina::return_code::success );
+
+  struct_library<4> lib( gates );
+  lib.construct( 2, false );
+
+  auto const& library_map = lib.get_struct_library();
+
+  /* translate to sorted vector */
+  std::vector<uint32_t> entry_ids;
+  std::for_each( library_map.begin(), library_map.end(), [&]( auto const& pair ) { entry_ids.push_back( pair.first ); return; } );
+  std::sort( entry_ids.begin(), entry_ids.end() );
+
+  CHECK( entry_ids.size() == 8 );
+
+  CHECK( entry_ids[0] % 2 == 0 );
+  CHECK( library_map.find( entry_ids[0] )->second.size() == 1 );
+  CHECK( library_map.find( entry_ids[0] )->second[0].root->root->name == "nor2" );
+  CHECK( library_map.find( entry_ids[0] )->second[0].area == 2 );
+  CHECK( library_map.find( entry_ids[0] )->second[0].tdelay[0] == 1.4f );
+  CHECK( library_map.find( entry_ids[0] )->second[0].tdelay[1] == 1.4f );
+  CHECK( library_map.find( entry_ids[0] )->second[0].tdelay[2] == 0 );
+  CHECK( library_map.find( entry_ids[0] )->second[0].tdelay[3] == 0 );
+  CHECK( library_map.find( entry_ids[0] )->second[0].polarity == 3 );
+
+  CHECK( entry_ids[1] % 2 == 1 );
+  CHECK( library_map.find( entry_ids[1] )->second.size() == 1 );
+  CHECK( library_map.find( entry_ids[1] )->second[0].root->root->name == "nand2" );
+  CHECK( library_map.find( entry_ids[1] )->second[0].area == 2 );
+  CHECK( library_map.find( entry_ids[1] )->second[0].tdelay[0] == 1.0f );
+  CHECK( library_map.find( entry_ids[1] )->second[0].tdelay[1] == 1.0f );
+  CHECK( library_map.find( entry_ids[1] )->second[0].tdelay[2] == 0 );
+  CHECK( library_map.find( entry_ids[1] )->second[0].tdelay[3] == 0 );
+  CHECK( library_map.find( entry_ids[1] )->second[0].polarity == 0 );
+
+  CHECK( entry_ids[2] % 2 == 0 );
+  CHECK( library_map.find( entry_ids[2] )->second.size() == 1 );
+  CHECK( library_map.find( entry_ids[2] )->second[0].root->root->name == "nor3" );
+  CHECK( library_map.find( entry_ids[2] )->second[0].area == 3 );
+  CHECK( library_map.find( entry_ids[2] )->second[0].tdelay[0] == 2.4f );
+  CHECK( library_map.find( entry_ids[2] )->second[0].tdelay[1] == 2.4f );
+  CHECK( library_map.find( entry_ids[2] )->second[0].tdelay[2] == 2.4f );
+  CHECK( library_map.find( entry_ids[2] )->second[0].tdelay[3] == 0 );
+  CHECK( library_map.find( entry_ids[2] )->second[0].polarity == 7 );
+
+  CHECK( entry_ids[3] % 2 == 1 );
+  CHECK( library_map.find( entry_ids[3] )->second.size() == 1 );
+  CHECK( library_map.find( entry_ids[3] )->second[0].root->root->name == "nand3" );
+  CHECK( library_map.find( entry_ids[3] )->second[0].area == 3 );
+  CHECK( library_map.find( entry_ids[3] )->second[0].tdelay[0] == 1.1f );
+  CHECK( library_map.find( entry_ids[3] )->second[0].tdelay[1] == 1.1f );
+  CHECK( library_map.find( entry_ids[3] )->second[0].tdelay[2] == 1.1f );
+  CHECK( library_map.find( entry_ids[3] )->second[0].tdelay[3] == 0 );
+  CHECK( library_map.find( entry_ids[3] )->second[0].polarity == 0 );
+
+  CHECK( entry_ids[4] % 2 == 0 );
+  CHECK( library_map.find( entry_ids[4] )->second.size() == 1 );
+  CHECK( library_map.find( entry_ids[4] )->second[0].root->root->name == "nor4" );
+  CHECK( library_map.find( entry_ids[4] )->second[0].area == 4 );
+  CHECK( library_map.find( entry_ids[4] )->second[0].tdelay[0] == 3.8f );
+  CHECK( library_map.find( entry_ids[4] )->second[0].tdelay[1] == 3.8f );
+  CHECK( library_map.find( entry_ids[4] )->second[0].tdelay[2] == 3.8f );
+  CHECK( library_map.find( entry_ids[4] )->second[0].tdelay[3] == 3.8f );
+  CHECK( library_map.find( entry_ids[4] )->second[0].polarity == 15 );
+
+  CHECK( entry_ids[5] % 2 == 1 );
+  CHECK( library_map.find( entry_ids[5] )->second.size() == 1 );
+  CHECK( library_map.find( entry_ids[5] )->second[0].root->root->name == "nand4" );
+  CHECK( library_map.find( entry_ids[5] )->second[0].area == 4 );
+  CHECK( library_map.find( entry_ids[5] )->second[0].tdelay[0] == 1.4f );
+  CHECK( library_map.find( entry_ids[5] )->second[0].tdelay[1] == 1.4f );
+  CHECK( library_map.find( entry_ids[5] )->second[0].tdelay[2] == 1.4f );
+  CHECK( library_map.find( entry_ids[5] )->second[0].tdelay[3] == 1.4f );
+  CHECK( library_map.find( entry_ids[5] )->second[0].polarity == 0 );
+
+  CHECK( entry_ids[6] % 2 == 0 );
+  CHECK( library_map.find( entry_ids[6] )->second.size() == 1 );
+  CHECK( library_map.find( entry_ids[6] )->second[0].root->root->name == "nor4" );
+  CHECK( library_map.find( entry_ids[6] )->second[0].area == 4 );
+  CHECK( library_map.find( entry_ids[6] )->second[0].tdelay[0] == 3.8f );
+  CHECK( library_map.find( entry_ids[6] )->second[0].tdelay[1] == 3.8f );
+  CHECK( library_map.find( entry_ids[6] )->second[0].tdelay[2] == 3.8f );
+  CHECK( library_map.find( entry_ids[6] )->second[0].tdelay[3] == 3.8f );
+  CHECK( library_map.find( entry_ids[6] )->second[0].polarity == 15 );
+
+  CHECK( entry_ids[7] % 2 == 1 );
+  CHECK( library_map.find( entry_ids[7] )->second.size() == 1 );
+  CHECK( library_map.find( entry_ids[7] )->second[0].root->root->name == "nand4" );
+  CHECK( library_map.find( entry_ids[7] )->second[0].area == 4 );
+  CHECK( library_map.find( entry_ids[7] )->second[0].tdelay[0] == 1.4f );
+  CHECK( library_map.find( entry_ids[7] )->second[0].tdelay[1] == 1.4f );
+  CHECK( library_map.find( entry_ids[7] )->second[0].tdelay[2] == 1.4f );
+  CHECK( library_map.find( entry_ids[7] )->second[0].tdelay[3] == 1.4f );
+  CHECK( library_map.find( entry_ids[7] )->second[0].polarity == 0 );
+}
+
+TEST_CASE( "Struct library creation ignore reconvergence", "[struct_library]" )
+{
+  std::vector<gate> gates;
+
+  std::istringstream in( reconv_library );
+  auto result = lorina::read_genlib( in, genlib_reader( gates ) );
+
+  CHECK( result == lorina::return_code::success );
+
+  struct_library<3> lib( gates );
+  lib.construct( 2, false );
+
+  auto const& library_map = lib.get_struct_library();
+
+  /* translate to sorted vector */
+  std::vector<uint32_t> entry_ids;
+  std::for_each( library_map.begin(), library_map.end(), [&]( auto const& pair ) { entry_ids.push_back( pair.first ); return; } );
+  std::sort( entry_ids.begin(), entry_ids.end() );
+
+  CHECK( entry_ids.size() == 1 );
+  CHECK( entry_ids[0] % 2 == 1 );
+  CHECK( library_map.find( entry_ids[0] )->second.size() == 1 );
+  CHECK( library_map.find( entry_ids[0] )->second[0].root->root->name == "nand2" );
+  CHECK( library_map.find( entry_ids[0] )->second[0].area == 2 );
+  CHECK( library_map.find( entry_ids[0] )->second[0].tdelay[0] == 1.0f );
+  CHECK( library_map.find( entry_ids[0] )->second[0].tdelay[1] == 1.0f );
+  CHECK( library_map.find( entry_ids[0] )->second[0].tdelay[2] == 0 );
+  CHECK( library_map.find( entry_ids[0] )->second[0].polarity == 0 );
+}
+
+TEST_CASE( "Struct library creation large rules", "[struct_library]" )
+{
+  std::vector<gate> gates;
+
+  std::istringstream in( large_library );
+  auto result = lorina::read_genlib( in, genlib_reader( gates ) );
+
+  CHECK( result == lorina::return_code::success );
+
+  struct_library<7> lib( gates );
+  lib.construct( 2, false );
+
+  auto const& library_map = lib.get_struct_library();
+
+  /* translate to sorted vector */
+  std::vector<uint32_t> entry_ids;
+  std::for_each( library_map.begin(), library_map.end(), [&]( auto const& pair ) { entry_ids.push_back( pair.first ); return; } );
+  std::sort( entry_ids.begin(), entry_ids.end() );
+
+  CHECK( entry_ids.size() == 2 );
+
+  CHECK( entry_ids[0] % 2 == 1 );
+  CHECK( library_map.find( entry_ids[0] )->second.size() == 1 );
+  CHECK( library_map.find( entry_ids[0] )->second[0].root->root->name == "oai322" );
+  CHECK( library_map.find( entry_ids[0] )->second[0].area == 8 );
+  CHECK( library_map.find( entry_ids[0] )->second[0].tdelay[0] == 3.0f );
+  CHECK( library_map.find( entry_ids[0] )->second[0].tdelay[1] == 3.0f );
+  CHECK( library_map.find( entry_ids[0] )->second[0].tdelay[2] == 3.0f );
+  CHECK( library_map.find( entry_ids[0] )->second[0].tdelay[3] == 3.0f );
+  CHECK( library_map.find( entry_ids[0] )->second[0].tdelay[4] == 3.0f );
+  CHECK( library_map.find( entry_ids[0] )->second[0].tdelay[5] == 3.0f );
+  CHECK( library_map.find( entry_ids[0] )->second[0].polarity == 127 );
+
+  CHECK( entry_ids[1] % 2 == 1 );
+  CHECK( library_map.find( entry_ids[1] )->second.size() == 1 );
+  CHECK( library_map.find( entry_ids[1] )->second[0].root->root->name == "oai322" );
+  CHECK( library_map.find( entry_ids[1] )->second[0].area == 8 );
+  CHECK( library_map.find( entry_ids[1] )->second[0].tdelay[0] == 3.0f );
+  CHECK( library_map.find( entry_ids[1] )->second[0].tdelay[1] == 3.0f );
+  CHECK( library_map.find( entry_ids[1] )->second[0].tdelay[2] == 3.0f );
+  CHECK( library_map.find( entry_ids[1] )->second[0].tdelay[3] == 3.0f );
+  CHECK( library_map.find( entry_ids[1] )->second[0].tdelay[4] == 3.0f );
+  CHECK( library_map.find( entry_ids[1] )->second[0].tdelay[5] == 3.0f );
+  CHECK( library_map.find( entry_ids[1] )->second[0].polarity == 127 );
+}
diff --git a/test/utils/super_utils.cpp b/test/utils/super_utils.cpp
index d077a08af..00b05c7df 100644
--- a/test/utils/super_utils.cpp
+++ b/test/utils/super_utils.cpp
@@ -15,12 +15,16 @@
 
 using namespace mockturtle;
 
-std::string const genlib_library = "GATE zero 0 O=0;\n"
-                                   "GATE one 0 O=1;\n"
-                                   "GATE inverter 1 O=!a; PIN * INV 1 999 1.0 1.0 1.0 1.0\n"
-                                   "GATE buffer 2 O=a; PIN * NONINV 1 999 1.0 1.0 1.0 1.0\n"
-                                   "GATE and 5 O=a*b; PIN * NONINV 1 999 1.0 1.0 1.0 1.0\n"
-                                   "GATE or 5 O=a+b; PIN * NONINV 1 999 1.0 1.0 1.0 1.0\n";
+std::string const genlib_library = "GATE zero       0 O=0;\n"
+                                   "GATE one        0 O=1;\n"
+                                   "GATE inverter   1 O=!a;           PIN * INV 1 999 1.0 1.0 1.0 1.0\n"
+                                   "GATE buffer     2 O=a;            PIN * NONINV 1 999 1.0 1.0 1.0 1.0\n"
+                                   "GATE and        5 O=a*b;          PIN * NONINV 1 999 1.0 1.0 1.0 1.0\n"
+                                   "GATE or         5 O=a+b;          PIN * NONINV 1 999 1.0 1.0 1.0 1.0\n"
+                                   "GATE ha        10 O=a*b;          PIN * INV 1 999 1.7 0.4 1.7 0.4\n"
+                                   "GATE ha        10 O=!a*b+a*!b;    PIN * INV 1 999 2.1 0.4 2.1 0.4\n"
+                                   "GATE fa        16 O=a*b+a*c+b*c;  PIN * INV 1 999 2.1 0.4 2.1 0.4\n"
+                                   "GATE fa        16 O=a^b^c;        PIN * INV 1 999 3.0 0.4 3.0 0.4";
 
 std::string const super_library = "test.genlib\n"
                                   "3\n"
@@ -48,7 +52,9 @@ TEST_CASE( "Standard gates super library", "[super_utils]" )
 
   CHECK( result == lorina::return_code::success );
 
-  super_utils<3> super( gates );
+  super_utils_params ps;
+  ps.load_multioutput_in_single = false;
+  super_utils<3> super( gates, {}, ps );
 
   auto const& lib = super.get_super_library();
   CHECK( lib.size() == 6 );
@@ -58,6 +64,7 @@ TEST_CASE( "Standard gates super library", "[super_utils]" )
   CHECK( lib[0].root == &gates[0] );
   CHECK( lib[0].num_vars == 0 );
   CHECK( lib[0].function == gates[0].function );
+  CHECK( lib[0].area == 0.0f );
   CHECK( lib[0].tdelay[0] == 0 );
   CHECK( lib[0].fanin.size() == 0 );
 
@@ -66,6 +73,7 @@ TEST_CASE( "Standard gates super library", "[super_utils]" )
   CHECK( lib[1].root == &gates[1] );
   CHECK( lib[1].num_vars == 0 );
   CHECK( lib[1].function == gates[1].function );
+  CHECK( lib[1].area == 0.0f );
   CHECK( lib[1].tdelay[0] == 0 );
   CHECK( lib[1].fanin.size() == 0 );
 
@@ -74,8 +82,125 @@ TEST_CASE( "Standard gates super library", "[super_utils]" )
   CHECK( lib[2].root == &gates[2] );
   CHECK( lib[2].num_vars == 1 );
   CHECK( lib[2].function == gates[2].function );
+  CHECK( lib[2].area == 1.0f );
   CHECK( lib[2].tdelay[0] == 1 );
+  CHECK( lib[2].tdelay[1] == 0 );
+  CHECK( lib[2].fanin.size() == 0 );
+
+  CHECK( lib[3].id == 3 );
+  CHECK( lib[3].is_super == false );
+  CHECK( lib[3].root == &gates[3] );
+  CHECK( lib[3].num_vars == 1 );
+  CHECK( lib[3].function == gates[3].function );
+  CHECK( lib[3].area == 2.0f );
+  CHECK( lib[3].tdelay[0] == 1 );
   CHECK( lib[3].tdelay[1] == 0 );
+  CHECK( lib[3].fanin.size() == 0 );
+
+  CHECK( lib[4].id == 4 );
+  CHECK( lib[4].is_super == false );
+  CHECK( lib[4].root == &gates[4] );
+  CHECK( lib[4].num_vars == 2 );
+  CHECK( lib[4].function == gates[4].function );
+  CHECK( lib[4].area == 5.0f );
+  CHECK( lib[4].tdelay[0] == 1 );
+  CHECK( lib[4].tdelay[1] == 1 );
+  CHECK( lib[4].fanin.size() == 0 );
+
+  CHECK( lib[5].id == 5 );
+  CHECK( lib[5].is_super == false );
+  CHECK( lib[5].root == &gates[5] );
+  CHECK( lib[5].num_vars == 2 );
+  CHECK( lib[5].function == gates[5].function );
+  CHECK( lib[5].area == 5.0f );
+  CHECK( lib[5].tdelay[0] == 1 );
+  CHECK( lib[5].tdelay[1] == 1 );
+  CHECK( lib[5].fanin.size() == 0 );
+
+  auto const& multi_lib = super.get_multioutput_library();
+  CHECK( multi_lib.size() == 2 );
+
+  CHECK( multi_lib[0].size() == 2 );
+  CHECK( multi_lib[0][0].is_super == false );
+  CHECK( multi_lib[0][0].root == &gates[6] );
+  CHECK( multi_lib[0][0].num_vars == 2 );
+  CHECK( multi_lib[0][0].function == gates[6].function );
+  CHECK( multi_lib[0][0].area == 10.0f );
+  CHECK( multi_lib[0][0].tdelay[0] == 1.7f );
+  CHECK( multi_lib[0][0].tdelay[1] == 1.7f );
+  CHECK( multi_lib[0][0].fanin.size() == 0 );
+  CHECK( multi_lib[0][1].is_super == false );
+  CHECK( multi_lib[0][1].root == &gates[7] );
+  CHECK( multi_lib[0][1].num_vars == 2 );
+  CHECK( multi_lib[0][1].function == gates[7].function );
+  CHECK( multi_lib[0][1].area == 10.0f );
+  CHECK( multi_lib[0][1].tdelay[0] == 2.1f );
+  CHECK( multi_lib[0][1].tdelay[1] == 2.1f );
+  CHECK( multi_lib[0][1].fanin.size() == 0 );
+
+  CHECK( multi_lib[1].size() == 2 );
+  CHECK( multi_lib[1][0].is_super == false );
+  CHECK( multi_lib[1][0].root == &gates[8] );
+  CHECK( multi_lib[1][0].num_vars == 3 );
+  CHECK( multi_lib[1][0].function == gates[8].function );
+  CHECK( multi_lib[1][0].area == 16.0f );
+  CHECK( multi_lib[1][0].tdelay[0] == 2.1f );
+  CHECK( multi_lib[1][0].tdelay[1] == 2.1f );
+  CHECK( multi_lib[1][0].tdelay[2] == 2.1f );
+  CHECK( multi_lib[1][0].fanin.size() == 0 );
+  CHECK( multi_lib[1][1].is_super == false );
+  CHECK( multi_lib[1][1].root == &gates[9] );
+  CHECK( multi_lib[1][1].num_vars == 3 );
+  CHECK( multi_lib[1][1].function == gates[9].function );
+  CHECK( multi_lib[1][1].area == 16.0f );
+  CHECK( multi_lib[1][1].tdelay[0] == 3.0f );
+  CHECK( multi_lib[1][1].tdelay[1] == 3.0f );
+  CHECK( multi_lib[1][1].tdelay[2] == 3.0f );
+  CHECK( multi_lib[1][1].fanin.size() == 0 );
+}
+
+TEST_CASE( "Multi-output gates included in single-output super library", "[super_utils]" )
+{
+  std::vector<gate> gates;
+
+  std::istringstream in_genlib( genlib_library );
+  auto result = lorina::read_genlib( in_genlib, genlib_reader( gates ) );
+
+  CHECK( result == lorina::return_code::success );
+
+  super_utils_params ps;
+  ps.load_multioutput_in_single = true;
+  super_utils<3> super( gates, {}, ps );
+
+  auto const& lib = super.get_super_library();
+  CHECK( lib.size() == 10 );
+
+  CHECK( lib[0].id == 0 );
+  CHECK( lib[0].is_super == false );
+  CHECK( lib[0].root == &gates[0] );
+  CHECK( lib[0].num_vars == 0 );
+  CHECK( lib[0].function == gates[0].function );
+  CHECK( lib[0].area == 0.0f );
+  CHECK( lib[0].tdelay[0] == 0 );
+  CHECK( lib[0].fanin.size() == 0 );
+
+  CHECK( lib[1].id == 1 );
+  CHECK( lib[1].is_super == false );
+  CHECK( lib[1].root == &gates[1] );
+  CHECK( lib[1].num_vars == 0 );
+  CHECK( lib[1].function == gates[1].function );
+  CHECK( lib[1].area == 0.0f );
+  CHECK( lib[1].tdelay[0] == 0 );
+  CHECK( lib[1].fanin.size() == 0 );
+
+  CHECK( lib[2].id == 2 );
+  CHECK( lib[2].is_super == false );
+  CHECK( lib[2].root == &gates[2] );
+  CHECK( lib[2].num_vars == 1 );
+  CHECK( lib[2].function == gates[2].function );
+  CHECK( lib[2].area == 1.0f );
+  CHECK( lib[2].tdelay[0] == 1 );
+  CHECK( lib[2].tdelay[1] == 0 );
   CHECK( lib[2].fanin.size() == 0 );
 
   CHECK( lib[3].id == 3 );
@@ -83,6 +208,7 @@ TEST_CASE( "Standard gates super library", "[super_utils]" )
   CHECK( lib[3].root == &gates[3] );
   CHECK( lib[3].num_vars == 1 );
   CHECK( lib[3].function == gates[3].function );
+  CHECK( lib[3].area == 2.0f );
   CHECK( lib[3].tdelay[0] == 1 );
   CHECK( lib[3].tdelay[1] == 0 );
   CHECK( lib[3].fanin.size() == 0 );
@@ -92,6 +218,7 @@ TEST_CASE( "Standard gates super library", "[super_utils]" )
   CHECK( lib[4].root == &gates[4] );
   CHECK( lib[4].num_vars == 2 );
   CHECK( lib[4].function == gates[4].function );
+  CHECK( lib[4].area == 5.0f );
   CHECK( lib[4].tdelay[0] == 1 );
   CHECK( lib[4].tdelay[1] == 1 );
   CHECK( lib[4].fanin.size() == 0 );
@@ -101,9 +228,93 @@ TEST_CASE( "Standard gates super library", "[super_utils]" )
   CHECK( lib[5].root == &gates[5] );
   CHECK( lib[5].num_vars == 2 );
   CHECK( lib[5].function == gates[5].function );
+  CHECK( lib[5].area == 5.0f );
   CHECK( lib[5].tdelay[0] == 1 );
   CHECK( lib[5].tdelay[1] == 1 );
   CHECK( lib[5].fanin.size() == 0 );
+
+  CHECK( lib[6].id == 6 );
+  CHECK( lib[6].is_super == false );
+  CHECK( lib[6].root == &gates[6] );
+  CHECK( lib[6].num_vars == 2 );
+  CHECK( lib[6].function == gates[6].function );
+  CHECK( lib[6].area == 10.0f );
+  CHECK( lib[6].tdelay[0] == 1.7f );
+  CHECK( lib[6].tdelay[1] == 1.7f );
+  CHECK( lib[6].fanin.size() == 0 );
+
+  CHECK( lib[7].id == 7 );
+  CHECK( lib[7].is_super == false );
+  CHECK( lib[7].root == &gates[7] );
+  CHECK( lib[7].num_vars == 2 );
+  CHECK( lib[7].function == gates[7].function );
+  CHECK( lib[7].area == 10.0f );
+  CHECK( lib[7].tdelay[0] == 2.1f );
+  CHECK( lib[7].tdelay[1] == 2.1f );
+  CHECK( lib[7].fanin.size() == 0 );
+
+  CHECK( lib[8].id == 8 );
+  CHECK( lib[8].is_super == false );
+  CHECK( lib[8].root == &gates[8] );
+  CHECK( lib[8].num_vars == 3 );
+  CHECK( lib[8].function == gates[8].function );
+  CHECK( lib[8].area == 16.0f );
+  CHECK( lib[8].tdelay[0] == 2.1f );
+  CHECK( lib[8].tdelay[1] == 2.1f );
+  CHECK( lib[8].tdelay[2] == 2.1f );
+  CHECK( lib[8].fanin.size() == 0 );
+
+  CHECK( lib[9].id == 9 );
+  CHECK( lib[9].is_super == false );
+  CHECK( lib[9].root == &gates[9] );
+  CHECK( lib[9].num_vars == 3 );
+  CHECK( lib[9].function == gates[9].function );
+  CHECK( lib[9].area == 16.0f );
+  CHECK( lib[9].tdelay[0] == 3.0f );
+  CHECK( lib[9].tdelay[1] == 3.0f );
+  CHECK( lib[9].tdelay[2] == 3.0f );
+  CHECK( lib[9].fanin.size() == 0 );
+
+  auto const& multi_lib = super.get_multioutput_library();
+  CHECK( multi_lib.size() == 2 );
+
+  CHECK( multi_lib[0].size() == 2 );
+  CHECK( multi_lib[0][0].is_super == false );
+  CHECK( multi_lib[0][0].root == &gates[6] );
+  CHECK( multi_lib[0][0].num_vars == 2 );
+  CHECK( multi_lib[0][0].function == gates[6].function );
+  CHECK( multi_lib[0][0].area == 10.0f );
+  CHECK( multi_lib[0][0].tdelay[0] == 1.7f );
+  CHECK( multi_lib[0][0].tdelay[1] == 1.7f );
+  CHECK( multi_lib[0][0].fanin.size() == 0 );
+  CHECK( multi_lib[0][1].is_super == false );
+  CHECK( multi_lib[0][1].root == &gates[7] );
+  CHECK( multi_lib[0][1].num_vars == 2 );
+  CHECK( multi_lib[0][1].function == gates[7].function );
+  CHECK( multi_lib[0][1].area == 10.0f );
+  CHECK( multi_lib[0][1].tdelay[0] == 2.1f );
+  CHECK( multi_lib[0][1].tdelay[1] == 2.1f );
+  CHECK( multi_lib[0][1].fanin.size() == 0 );
+
+  CHECK( multi_lib[1].size() == 2 );
+  CHECK( multi_lib[1][0].is_super == false );
+  CHECK( multi_lib[1][0].root == &gates[8] );
+  CHECK( multi_lib[1][0].num_vars == 3 );
+  CHECK( multi_lib[1][0].function == gates[8].function );
+  CHECK( multi_lib[1][0].area == 16.0f );
+  CHECK( multi_lib[1][0].tdelay[0] == 2.1f );
+  CHECK( multi_lib[1][0].tdelay[1] == 2.1f );
+  CHECK( multi_lib[1][0].tdelay[2] == 2.1f );
+  CHECK( multi_lib[1][0].fanin.size() == 0 );
+  CHECK( multi_lib[1][1].is_super == false );
+  CHECK( multi_lib[1][1].root == &gates[9] );
+  CHECK( multi_lib[1][1].num_vars == 3 );
+  CHECK( multi_lib[1][1].function == gates[9].function );
+  CHECK( multi_lib[1][1].area == 16.0f );
+  CHECK( multi_lib[1][1].tdelay[0] == 3.0f );
+  CHECK( multi_lib[1][1].tdelay[1] == 3.0f );
+  CHECK( multi_lib[1][1].tdelay[2] == 3.0f );
+  CHECK( multi_lib[1][1].fanin.size() == 0 );
 }
 
 TEST_CASE( "Supergates super library", "[super_utils]" )
@@ -121,6 +332,8 @@ TEST_CASE( "Supergates super library", "[super_utils]" )
 
   CHECK( result == lorina::return_code::success );
 
+  super_utils_params ps;
+  ps.load_multioutput_in_single = false;
   super_utils<3> super( gates, super_data );
 
   auto const& lib = super.get_super_library();
diff --git a/test/utils/tech_library.cpp b/test/utils/tech_library.cpp
index 74b7fa517..8968c4b56 100644
--- a/test/utils/tech_library.cpp
+++ b/test/utils/tech_library.cpp
@@ -45,6 +45,19 @@ std::string const super_library = "simple.genlib\n"
                                   "* and 2 9\n"
                                   "\0";
 
+std::string const multioutput_test_library = "GATE   inv1    1 O=!a;     PIN * INV 1 999 0.9 0.3 0.9 0.3\n"
+                                             "GATE   inv2    2 O=!a;     PIN * INV 2 999 1.0 0.1 1.0 0.1\n"
+                                             "GATE   buf     2 O=a;      PIN * NONINV 1 999 1.0 0.0 1.0 0.0\n"
+                                             "GATE   nand2   2 O=!(a*b); PIN * INV 1 999 1.0 0.2 1.0 0.2\n"
+                                             "GATE   ha      6 O=a*b;    PIN * INV 1 999 1.2 0.4 1.2 0.4\n"
+                                             "GATE   ha      6 O=a^b;    PIN * INV 1 999 2.1 0.4 2.1 0.4";
+
+std::string const large_test_library = "GATE   inv1    1 O=!a;                      PIN * INV 1 999 0.9 0.3 0.9 0.3\n"
+                                       "GATE   inv2    2 O=!a;                      PIN * INV 2 999 1.0 0.1 1.0 0.1\n"
+                                       "GATE   buf     2 O=a;                       PIN * NONINV 1 999 1.0 0.0 1.0 0.0\n"
+                                       "GATE   nand2   2 O=!(a*b);                  PIN * INV 1 999 1.0 0.2 1.0 0.2\n"
+                                       "GATE   oai322  8 O=!((a+b+c)*(d+e)*(f+g));  PIN * INV 1 999 3.0 0.4 3.0 0.4";
+
 std::string const test_library = "GATE   inv1    3 O=!a;               PIN * INV 3 999 1.1 0.09 1.1 0.09\n"
                                  "GATE   inv2    2 O=!a;               PIN * INV 2 999 1.0 0.1 1.0 0.1\n"
                                  "GATE   inv3    1 O=!a;               PIN * INV 1 999 0.9 0.3 0.9 0.3\n"
@@ -87,20 +100,16 @@ TEST_CASE( "Simple test library generation 1", "[tech_library]" )
   kitty::static_truth_table<2> tt;
 
   kitty::create_from_hex_string( tt, "5" );
-  auto const inv = lib.get_supergates( tt );
+  auto const inv = lib.get_supergates( kitty::extend_to<6>( tt ) );
   CHECK( inv != nullptr );
-  CHECK( inv->size() == 2 );
+  CHECK( inv->size() == 1 );  /* the other is dominated and removed */
   CHECK( ( *inv )[0].root->root->name == "inv1" );
   CHECK( ( *inv )[0].area == 1.0f );
   CHECK( ( *inv )[0].tdelay[0] == 0.9f );
   CHECK( ( *inv )[0].polarity == 0u );
-  CHECK( ( *inv )[1].root->root->name == "inv2" );
-  CHECK( ( *inv )[1].area == 2.0f );
-  CHECK( ( *inv )[1].tdelay[0] == 1.0f );
-  CHECK( ( *inv )[1].polarity == 0u );
 
   kitty::create_from_hex_string( tt, "7" );
-  auto const nand_7 = lib.get_supergates( tt );
+  auto const nand_7 = lib.get_supergates( kitty::extend_to<6>( tt ) );
   CHECK( nand_7 != nullptr );
   CHECK( nand_7->size() == 1 );
   CHECK( ( *nand_7 )[0].root->root->name == "nand2" );
@@ -110,7 +119,7 @@ TEST_CASE( "Simple test library generation 1", "[tech_library]" )
   CHECK( ( *nand_7 )[0].polarity == 0u );
 
   kitty::create_from_hex_string( tt, "b" );
-  auto const nand_b = lib.get_supergates( tt );
+  auto const nand_b = lib.get_supergates( kitty::extend_to<6>( tt ) );
   CHECK( nand_b != nullptr );
   CHECK( nand_b->size() == 1 );
   CHECK( ( *nand_b )[0].root->root->name == "nand2" );
@@ -120,7 +129,7 @@ TEST_CASE( "Simple test library generation 1", "[tech_library]" )
   CHECK( ( *nand_b )[0].polarity == 1u );
 
   kitty::create_from_hex_string( tt, "d" );
-  auto const nand_d = lib.get_supergates( tt );
+  auto const nand_d = lib.get_supergates( kitty::extend_to<6>( tt ) );
   CHECK( nand_d != nullptr );
   CHECK( nand_d->size() == 1 );
   CHECK( ( *nand_d )[0].root->root->name == "nand2" );
@@ -130,7 +139,7 @@ TEST_CASE( "Simple test library generation 1", "[tech_library]" )
   CHECK( ( *nand_d )[0].polarity == 2u );
 
   kitty::create_from_hex_string( tt, "e" );
-  auto const nand_e = lib.get_supergates( tt );
+  auto const nand_e = lib.get_supergates( kitty::extend_to<6>( tt ) );
   CHECK( nand_e != nullptr );
   CHECK( nand_e->size() == 1 );
   CHECK( ( *nand_e )[0].root->root->name == "nand2" );
@@ -149,7 +158,9 @@ TEST_CASE( "Simple test library generation 2", "[tech_library]" )
 
   CHECK( result == lorina::return_code::success );
 
-  tech_library<2, classification_type::p_configurations> lib( gates );
+  tech_library_params ps;
+  ps.remove_dominated_gates = false;
+  tech_library<2, classification_type::p_configurations> lib( gates, ps );
 
   CHECK( lib.max_gate_size() == 2 );
   CHECK( lib.get_inverter_info() == std::make_tuple( 1.0f, 0.9f, 0u ) );
@@ -158,7 +169,7 @@ TEST_CASE( "Simple test library generation 2", "[tech_library]" )
   kitty::static_truth_table<2> tt;
 
   kitty::create_from_hex_string( tt, "5" );
-  auto const inv = lib.get_supergates( tt );
+  auto const inv = lib.get_supergates( kitty::extend_to<6>( tt ) );
   CHECK( inv != nullptr );
   CHECK( inv->size() == 2 );
   CHECK( ( *inv )[0].root->root->name == "inv1" );
@@ -171,7 +182,7 @@ TEST_CASE( "Simple test library generation 2", "[tech_library]" )
   CHECK( ( *inv )[1].polarity == 0u );
 
   kitty::create_from_hex_string( tt, "7" );
-  auto const nand_7 = lib.get_supergates( tt );
+  auto const nand_7 = lib.get_supergates( kitty::extend_to<6>( tt ) );
   CHECK( nand_7 != nullptr );
   CHECK( nand_7->size() == 1 );
   CHECK( ( *nand_7 )[0].root->root->name == "nand2" );
@@ -181,15 +192,15 @@ TEST_CASE( "Simple test library generation 2", "[tech_library]" )
   CHECK( ( *nand_7 )[0].polarity == 0u );
 
   kitty::create_from_hex_string( tt, "b" );
-  auto const nand_b = lib.get_supergates( tt );
+  auto const nand_b = lib.get_supergates( kitty::extend_to<6>( tt ) );
   CHECK( nand_b == nullptr );
 
   kitty::create_from_hex_string( tt, "d" );
-  auto const nand_d = lib.get_supergates( tt );
+  auto const nand_d = lib.get_supergates( kitty::extend_to<6>( tt ) );
   CHECK( nand_d == nullptr );
 
   kitty::create_from_hex_string( tt, "e" );
-  auto const nand_e = lib.get_supergates( tt );
+  auto const nand_e = lib.get_supergates( kitty::extend_to<6>( tt ) );
   CHECK( nand_e == nullptr );
 }
 
@@ -221,7 +232,7 @@ TEST_CASE( "Supergate library generation P", "[tech_library]" )
   kitty::static_truth_table<3> tt;
 
   kitty::create_from_hex_string( tt, "55" );
-  auto const inv = lib.get_supergates( tt );
+  auto const inv = lib.get_supergates( kitty::extend_to<6>( tt ) );
   CHECK( inv != nullptr );
   CHECK( inv->size() == 1 );
   CHECK( ( *inv )[0].root->root->name == "inverter" );
@@ -230,7 +241,7 @@ TEST_CASE( "Supergate library generation P", "[tech_library]" )
   CHECK( ( *inv )[0].polarity == 0u );
 
   kitty::create_from_hex_string( tt, "11" );
-  auto const and_1 = lib.get_supergates( tt );
+  auto const and_1 = lib.get_supergates( kitty::extend_to<6>( tt ) );
   CHECK( and_1 != nullptr );
   CHECK( and_1->size() == 1 );
   CHECK( ( *and_1 )[0].root->root->name == "and" );
@@ -240,19 +251,19 @@ TEST_CASE( "Supergate library generation P", "[tech_library]" )
   CHECK( ( *and_1 )[0].polarity == 3u );
 
   kitty::create_from_hex_string( tt, "22" );
-  auto const and_8 = lib.get_supergates( tt );
+  auto const and_8 = lib.get_supergates( kitty::extend_to<6>( tt ) );
   CHECK( and_8 == nullptr );
 
   kitty::create_from_hex_string( tt, "44" );
-  auto const nand_d = lib.get_supergates( tt );
+  auto const nand_d = lib.get_supergates( kitty::extend_to<6>( tt ) );
   CHECK( nand_d == nullptr );
 
   kitty::create_from_hex_string( tt, "88" );
-  auto const nand_e = lib.get_supergates( tt );
+  auto const nand_e = lib.get_supergates( kitty::extend_to<6>( tt ) );
   CHECK( nand_e == nullptr );
 
   kitty::create_from_hex_string( tt, "07" );
-  auto const andor_07 = lib.get_supergates( tt );
+  auto const andor_07 = lib.get_supergates( kitty::extend_to<6>( tt ) );
   CHECK( andor_07 != nullptr );
   CHECK( andor_07->size() == 1 );
   CHECK( ( *andor_07 )[0].root->root->name == "and" );
@@ -263,7 +274,7 @@ TEST_CASE( "Supergate library generation P", "[tech_library]" )
   CHECK( ( *andor_07 )[0].polarity == 7u );
 
   kitty::create_from_hex_string( tt, "01" );
-  auto const and_01 = lib.get_supergates( tt );
+  auto const and_01 = lib.get_supergates( kitty::extend_to<6>( tt ) );
   CHECK( and_01 != nullptr );
   CHECK( and_01->size() == 2 );
   CHECK( ( *and_01 )[0].root->root->name == "and" );
@@ -308,7 +319,7 @@ TEST_CASE( "Supergate library generation NP", "[tech_library]" )
   kitty::static_truth_table<3> tt;
 
   kitty::create_from_hex_string( tt, "11" );
-  auto const and_1 = lib.get_supergates( tt );
+  auto const and_1 = lib.get_supergates( kitty::extend_to<6>( tt ) );
   CHECK( and_1 != nullptr );
   CHECK( and_1->size() == 1 );
   CHECK( ( *and_1 )[0].root->root->name == "and" );
@@ -318,7 +329,7 @@ TEST_CASE( "Supergate library generation NP", "[tech_library]" )
   CHECK( ( *and_1 )[0].polarity == 3u );
 
   kitty::create_from_hex_string( tt, "22" );
-  auto const and_2 = lib.get_supergates( tt );
+  auto const and_2 = lib.get_supergates( kitty::extend_to<6>( tt ) );
   CHECK( and_2 != nullptr );
   CHECK( and_2->size() == 1 );
   CHECK( ( *and_2 )[0].root->root->name == "and" );
@@ -328,7 +339,7 @@ TEST_CASE( "Supergate library generation NP", "[tech_library]" )
   CHECK( ( *and_2 )[0].polarity == 2u );
 
   kitty::create_from_hex_string( tt, "44" );
-  auto const and_4 = lib.get_supergates( tt );
+  auto const and_4 = lib.get_supergates( kitty::extend_to<6>( tt ) );
   CHECK( and_4 != nullptr );
   CHECK( and_4->size() == 1 );
   CHECK( ( *and_4 )[0].root->root->name == "and" );
@@ -338,7 +349,7 @@ TEST_CASE( "Supergate library generation NP", "[tech_library]" )
   CHECK( ( *and_4 )[0].polarity == 1u );
 
   kitty::create_from_hex_string( tt, "88" );
-  auto const and_8 = lib.get_supergates( tt );
+  auto const and_8 = lib.get_supergates( kitty::extend_to<6>( tt ) );
   CHECK( and_8 != nullptr );
   CHECK( and_8->size() == 1 );
   CHECK( ( *and_8 )[0].root->root->name == "and" );
@@ -348,7 +359,7 @@ TEST_CASE( "Supergate library generation NP", "[tech_library]" )
   CHECK( ( *and_8 )[0].polarity == 0u );
 
   kitty::create_from_hex_string( tt, "07" );
-  auto const andor_07 = lib.get_supergates( tt );
+  auto const andor_07 = lib.get_supergates( kitty::extend_to<6>( tt ) );
   CHECK( andor_07 != nullptr );
   CHECK( andor_07->size() == 1 );
   CHECK( ( *andor_07 )[0].root->root->name == "and" );
@@ -359,7 +370,7 @@ TEST_CASE( "Supergate library generation NP", "[tech_library]" )
   CHECK( ( *andor_07 )[0].polarity == 7u );
 
   kitty::create_from_hex_string( tt, "e0" );
-  auto const andor_e0 = lib.get_supergates( tt );
+  auto const andor_e0 = lib.get_supergates( kitty::extend_to<6>( tt ) );
   CHECK( andor_e0 != nullptr );
   CHECK( andor_e0->size() == 1 );
   CHECK( ( *andor_e0 )[0].root->root->name == "and" );
@@ -370,6 +381,614 @@ TEST_CASE( "Supergate library generation NP", "[tech_library]" )
   CHECK( ( *andor_e0 )[0].polarity == 0u );
 }
 
+TEST_CASE( "Multi-output library generation 1", "[tech_library]" )
+{
+  std::vector<gate> gates;
+
+  std::istringstream in( multioutput_test_library );
+  auto result = lorina::read_genlib( in, genlib_reader( gates ) );
+
+  CHECK( result == lorina::return_code::success );
+
+  tech_library_params tps;
+  tps.load_multioutput_gates = true;
+  tps.load_multioutput_gates_single = false;
+  tech_library<2, classification_type::np_configurations> lib( gates, tps );
+
+  CHECK( lib.max_gate_size() == 2 );
+  CHECK( lib.get_inverter_info() == std::make_tuple( 1.0f, 0.9f, 0u ) );
+  CHECK( lib.get_buffer_info() == std::make_tuple( 2.0f, 1.0f, 2u ) );
+
+  kitty::static_truth_table<2> tt;
+
+  kitty::create_from_hex_string( tt, "5" );
+  auto const inv = lib.get_supergates( kitty::extend_to<6>( tt ) );
+  CHECK( inv != nullptr );
+  CHECK( inv->size() == 1 );  /* the other is dominated and removed */
+  CHECK( ( *inv )[0].root->root->name == "inv1" );
+  CHECK( ( *inv )[0].area == 1.0f );
+  CHECK( ( *inv )[0].tdelay[0] == 0.9f );
+  CHECK( ( *inv )[0].polarity == 0u );
+
+  kitty::create_from_hex_string( tt, "7" );
+  auto const nand_7 = lib.get_supergates( kitty::extend_to<6>( tt ) );
+  CHECK( nand_7 != nullptr );
+  CHECK( nand_7->size() == 1 );
+  CHECK( ( *nand_7 )[0].root->root->name == "nand2" );
+  CHECK( ( *nand_7 )[0].area == 2.0f );
+  CHECK( ( *nand_7 )[0].tdelay[0] == 1.0f );
+  CHECK( ( *nand_7 )[0].tdelay[1] == 1.0f );
+  CHECK( ( *nand_7 )[0].polarity == 0u );
+
+  kitty::create_from_hex_string( tt, "b" );
+  auto const nand_b = lib.get_supergates( kitty::extend_to<6>( tt ) );
+  CHECK( nand_b != nullptr );
+  CHECK( nand_b->size() == 1 );
+  CHECK( ( *nand_b )[0].root->root->name == "nand2" );
+  CHECK( ( *nand_b )[0].area == 2.0f );
+  CHECK( ( *nand_b )[0].tdelay[0] == 1.0f );
+  CHECK( ( *nand_b )[0].tdelay[1] == 1.0f );
+  CHECK( ( *nand_b )[0].polarity == 1u );
+
+  kitty::create_from_hex_string( tt, "d" );
+  auto const nand_d = lib.get_supergates( kitty::extend_to<6>( tt ) );
+  CHECK( nand_d != nullptr );
+  CHECK( nand_d->size() == 1 );
+  CHECK( ( *nand_d )[0].root->root->name == "nand2" );
+  CHECK( ( *nand_d )[0].area == 2.0f );
+  CHECK( ( *nand_d )[0].tdelay[0] == 1.0f );
+  CHECK( ( *nand_d )[0].tdelay[1] == 1.0f );
+  CHECK( ( *nand_d )[0].polarity == 2u );
+
+  kitty::create_from_hex_string( tt, "e" );
+  auto const nand_e = lib.get_supergates( kitty::extend_to<6>( tt ) );
+  CHECK( nand_e != nullptr );
+  CHECK( nand_e->size() == 1 );
+  CHECK( ( *nand_e )[0].root->root->name == "nand2" );
+  CHECK( ( *nand_e )[0].area == 2.0f );
+  CHECK( ( *nand_e )[0].tdelay[0] == 1.0f );
+  CHECK( ( *nand_e )[0].tdelay[1] == 1.0f );
+  CHECK( ( *nand_e )[0].polarity == 3u );
+
+  std::array<kitty::static_truth_table<6>, 2> multi_tt;
+  CHECK( lib.num_multioutput_gates() == 4 );
+
+  kitty::create_from_hex_string( tt, "6" );
+  multi_tt[0] = kitty::extend_to<6>( tt );
+  kitty::create_from_hex_string( tt, "8" );
+  multi_tt[1] = kitty::extend_to<6>( tt );
+  auto const ha_8 = lib.get_multi_supergates( multi_tt );
+  CHECK( ha_8 != nullptr );
+  CHECK( ha_8->size() == 2 );
+  CHECK( ha_8->at( 0 ).size() == 1 );
+  CHECK( ha_8->at( 0 )[0].root->root->name == "ha" );
+  CHECK( ha_8->at( 0 )[0].area == 3.0f );
+  CHECK( ha_8->at( 0 )[0].tdelay[0] == 2.1f );
+  CHECK( ha_8->at( 0 )[0].tdelay[1] == 2.1f );
+  CHECK( ha_8->at( 0 )[0].polarity == 0u );
+  CHECK( ha_8->at( 1 ).size() == 1 );
+  CHECK( ha_8->at( 1 )[0].root->root->name == "ha" );
+  CHECK( ha_8->at( 1 )[0].area == 3.0f );
+  CHECK( ha_8->at( 1 )[0].tdelay[0] == 1.2f );
+  CHECK( ha_8->at( 1 )[0].tdelay[1] == 1.2f );
+  CHECK( ha_8->at( 1 )[0].polarity == 0u );
+
+  kitty::create_from_hex_string( tt, "e" );
+  multi_tt[1] = kitty::extend_to<6>( tt );
+  auto const ha_e = lib.get_multi_supergates( multi_tt );
+  CHECK( ha_e != nullptr );
+  CHECK( ha_e->size() == 2 );
+  CHECK( ha_e->at( 0 ).size() == 1 );
+  CHECK( ha_e->at( 0 )[0].root->root->name == "ha" );
+  CHECK( ha_e->at( 0 )[0].area == 3.0f );
+  CHECK( ha_e->at( 0 )[0].tdelay[0] == 2.1f );
+  CHECK( ha_e->at( 0 )[0].tdelay[1] == 2.1f );
+  CHECK( ha_e->at( 0 )[0].polarity == 3u );
+  CHECK( ha_e->at( 1 ).size() == 1 );
+  CHECK( ha_e->at( 1 )[0].root->root->name == "ha" );
+  CHECK( ha_e->at( 1 )[0].area == 3.0f );
+  CHECK( ha_e->at( 1 )[0].tdelay[0] == 1.2f );
+  CHECK( ha_e->at( 1 )[0].tdelay[1] == 1.2f );
+  CHECK( ha_e->at( 1 )[0].polarity == 7u );
+
+  multi_tt[1] = multi_tt[0];
+
+  kitty::create_from_hex_string( tt, "4" );
+  multi_tt[0] = kitty::extend_to<6>( tt );
+  auto const ha_4 = lib.get_multi_supergates( multi_tt );
+  CHECK( ha_4 != nullptr );
+  CHECK( ha_4->size() == 2 );
+  CHECK( ha_4->at( 0 ).size() == 1 );
+  CHECK( ha_4->at( 0 )[0].root->root->name == "ha" );
+  CHECK( ha_4->at( 0 )[0].area == 3.0f );
+  CHECK( ha_4->at( 0 )[0].tdelay[0] == 1.2f );
+  CHECK( ha_4->at( 0 )[0].tdelay[1] == 1.2f );
+  CHECK( ha_4->at( 0 )[0].polarity == 1u );
+  CHECK( ha_4->at( 1 ).size() == 1 );
+  CHECK( ha_4->at( 1 )[0].root->root->name == "ha" );
+  CHECK( ha_4->at( 1 )[0].area == 3.0f );
+  CHECK( ha_4->at( 1 )[0].tdelay[0] == 2.1f );
+  CHECK( ha_4->at( 1 )[0].tdelay[1] == 2.1f );
+  CHECK( ha_4->at( 1 )[0].polarity == 5u );
+
+  kitty::create_from_hex_string( tt, "2" );
+  multi_tt[0] = kitty::extend_to<6>( tt );
+  auto const ha_2 = lib.get_multi_supergates( multi_tt );
+  CHECK( ha_2 != nullptr );
+  CHECK( ha_2->size() == 2 );
+  CHECK( ha_2->at( 0 ).size() == 1 );
+  CHECK( ha_2->at( 0 )[0].root->root->name == "ha" );
+  CHECK( ha_2->at( 0 )[0].area == 3.0f );
+  CHECK( ha_2->at( 0 )[0].tdelay[0] == 1.2f );
+  CHECK( ha_2->at( 0 )[0].tdelay[1] == 1.2f );
+  CHECK( ha_2->at( 0 )[0].polarity == 2u );
+  CHECK( ha_2->at( 1 ).size() == 1 );
+  CHECK( ha_2->at( 1 )[0].root->root->name == "ha" );
+  CHECK( ha_2->at( 1 )[0].area == 3.0f );
+  CHECK( ha_2->at( 1 )[0].tdelay[0] == 2.1f );
+  CHECK( ha_2->at( 1 )[0].tdelay[1] == 2.1f );
+  CHECK( ha_2->at( 1 )[0].polarity == 6u );
+
+  std::vector<standard_cell> cells = lib.get_cells();
+
+  CHECK( cells.size() == 5 );
+  CHECK( cells[0].name == "inv1" );
+  CHECK( cells[0].id == 0 );
+  CHECK( cells[0].gates.size() == 1 );
+  CHECK( cells[0].gates[0].id == gates[0].id );
+  CHECK( cells[0].area == 1 );
+
+  CHECK( cells[1].name == "inv2" );
+  CHECK( cells[1].id == 1 );
+  CHECK( cells[1].gates.size() == 1 );
+  CHECK( cells[1].gates[0].id == gates[1].id );
+  CHECK( cells[1].area == 2 );
+
+  CHECK( cells[2].name == "buf" );
+  CHECK( cells[2].id == 2 );
+  CHECK( cells[2].gates.size() == 1 );
+  CHECK( cells[2].gates[0].id == gates[2].id );
+  CHECK( cells[2].area == 2 );
+
+  CHECK( cells[3].name == "nand2" );
+  CHECK( cells[3].id == 3 );
+  CHECK( cells[3].gates.size() == 1 );
+  CHECK( cells[3].gates[0].id == gates[3].id );
+  CHECK( cells[3].area == 2 );
+
+  CHECK( cells[4].name == "ha" );
+  CHECK( cells[4].id == 4 );
+  CHECK( cells[4].gates.size() == 2 );
+  CHECK( cells[4].gates[0].id == gates[4].id );
+  CHECK( cells[4].gates[1].id == gates[5].id );
+  CHECK( cells[4].area == 6 );
+}
+
+TEST_CASE( "Multi-output library generation 2", "[tech_library]" )
+{
+  std::vector<gate> gates;
+
+  std::istringstream in( multioutput_test_library );
+  auto result = lorina::read_genlib( in, genlib_reader( gates ) );
+
+  CHECK( result == lorina::return_code::success );
+
+  tech_library_params tps;
+  tps.load_multioutput_gates = true;
+  tps.load_multioutput_gates_single = true;
+  tech_library<2, classification_type::np_configurations> lib( gates, tps );
+
+  CHECK( lib.max_gate_size() == 2 );
+  CHECK( lib.get_inverter_info() == std::make_tuple( 1.0f, 0.9f, 0u ) );
+  CHECK( lib.get_buffer_info() == std::make_tuple( 2.0f, 1.0f, 2u ) );
+
+  kitty::static_truth_table<2> tt;
+
+  kitty::create_from_hex_string( tt, "5" );
+  auto const inv = lib.get_supergates( kitty::extend_to<6>( tt ) );
+  CHECK( inv != nullptr );
+  CHECK( inv->size() == 1 );  /* the other is dominated and removed */
+  CHECK( ( *inv )[0].root->root->name == "inv1" );
+  CHECK( ( *inv )[0].area == 1.0f );
+  CHECK( ( *inv )[0].tdelay[0] == 0.9f );
+  CHECK( ( *inv )[0].polarity == 0u );
+
+  kitty::create_from_hex_string( tt, "7" );
+  auto const nand_7 = lib.get_supergates( kitty::extend_to<6>( tt ) );
+  CHECK( nand_7 != nullptr );
+  CHECK( nand_7->size() == 1 );
+  CHECK( ( *nand_7 )[0].root->root->name == "nand2" );
+  CHECK( ( *nand_7 )[0].area == 2.0f );
+  CHECK( ( *nand_7 )[0].tdelay[0] == 1.0f );
+  CHECK( ( *nand_7 )[0].tdelay[1] == 1.0f );
+  CHECK( ( *nand_7 )[0].polarity == 0u );
+
+  kitty::create_from_hex_string( tt, "b" );
+  auto const nand_b = lib.get_supergates( kitty::extend_to<6>( tt ) );
+  CHECK( nand_b != nullptr );
+  CHECK( nand_b->size() == 1 );
+  CHECK( ( *nand_b )[0].root->root->name == "nand2" );
+  CHECK( ( *nand_b )[0].area == 2.0f );
+  CHECK( ( *nand_b )[0].tdelay[0] == 1.0f );
+  CHECK( ( *nand_b )[0].tdelay[1] == 1.0f );
+  CHECK( ( *nand_b )[0].polarity == 1u );
+
+  kitty::create_from_hex_string( tt, "d" );
+  auto const nand_d = lib.get_supergates( kitty::extend_to<6>( tt ) );
+  CHECK( nand_d != nullptr );
+  CHECK( nand_d->size() == 1 );
+  CHECK( ( *nand_d )[0].root->root->name == "nand2" );
+  CHECK( ( *nand_d )[0].area == 2.0f );
+  CHECK( ( *nand_d )[0].tdelay[0] == 1.0f );
+  CHECK( ( *nand_d )[0].tdelay[1] == 1.0f );
+  CHECK( ( *nand_d )[0].polarity == 2u );
+
+  kitty::create_from_hex_string( tt, "e" );
+  auto const nand_e = lib.get_supergates( kitty::extend_to<6>( tt ) );
+  CHECK( nand_e != nullptr );
+  CHECK( nand_e->size() == 1 );
+  CHECK( ( *nand_e )[0].root->root->name == "nand2" );
+  CHECK( ( *nand_e )[0].area == 2.0f );
+  CHECK( ( *nand_e )[0].tdelay[0] == 1.0f );
+  CHECK( ( *nand_e )[0].tdelay[1] == 1.0f );
+  CHECK( ( *nand_e )[0].polarity == 3u );
+
+  std::array<kitty::static_truth_table<6>, 2> multi_tt;
+  CHECK( lib.num_multioutput_gates() == 4 );
+
+  kitty::create_from_hex_string( tt, "6" );
+  multi_tt[0] = kitty::extend_to<6>( tt );
+  kitty::create_from_hex_string( tt, "8" );
+  multi_tt[1] = kitty::extend_to<6>( tt );
+  auto const ha_8 = lib.get_multi_supergates( multi_tt );
+  CHECK( ha_8 != nullptr );
+  CHECK( ha_8->size() == 2 );
+  CHECK( ha_8->at( 0 ).size() == 1 );
+  CHECK( ha_8->at( 0 )[0].root->root->name == "ha" );
+  CHECK( ha_8->at( 0 )[0].area == 3.0f );
+  CHECK( ha_8->at( 0 )[0].tdelay[0] == 2.1f );
+  CHECK( ha_8->at( 0 )[0].tdelay[1] == 2.1f );
+  CHECK( ha_8->at( 0 )[0].polarity == 0u );
+  CHECK( ha_8->at( 1 ).size() == 1 );
+  CHECK( ha_8->at( 1 )[0].root->root->name == "ha" );
+  CHECK( ha_8->at( 1 )[0].area == 3.0f );
+  CHECK( ha_8->at( 1 )[0].tdelay[0] == 1.2f );
+  CHECK( ha_8->at( 1 )[0].tdelay[1] == 1.2f );
+  CHECK( ha_8->at( 1 )[0].polarity == 0u );
+
+  kitty::create_from_hex_string( tt, "e" );
+  multi_tt[1] = kitty::extend_to<6>( tt );
+  auto const ha_e = lib.get_multi_supergates( multi_tt );
+  CHECK( ha_e != nullptr );
+  CHECK( ha_e->size() == 2 );
+  CHECK( ha_e->at( 0 ).size() == 1 );
+  CHECK( ha_e->at( 0 )[0].root->root->name == "ha" );
+  CHECK( ha_e->at( 0 )[0].area == 3.0f );
+  CHECK( ha_e->at( 0 )[0].tdelay[0] == 2.1f );
+  CHECK( ha_e->at( 0 )[0].tdelay[1] == 2.1f );
+  CHECK( ha_e->at( 0 )[0].polarity == 3u );
+  CHECK( ha_e->at( 1 ).size() == 1 );
+  CHECK( ha_e->at( 1 )[0].root->root->name == "ha" );
+  CHECK( ha_e->at( 1 )[0].area == 3.0f );
+  CHECK( ha_e->at( 1 )[0].tdelay[0] == 1.2f );
+  CHECK( ha_e->at( 1 )[0].tdelay[1] == 1.2f );
+  CHECK( ha_e->at( 1 )[0].polarity == 7u );
+
+  multi_tt[1] = multi_tt[0];
+
+  kitty::create_from_hex_string( tt, "4" );
+  multi_tt[0] = kitty::extend_to<6>( tt );
+  auto const ha_4 = lib.get_multi_supergates( multi_tt );
+  CHECK( ha_4 != nullptr );
+  CHECK( ha_4->size() == 2 );
+  CHECK( ha_4->at( 0 ).size() == 1 );
+  CHECK( ha_4->at( 0 )[0].root->root->name == "ha" );
+  CHECK( ha_4->at( 0 )[0].area == 3.0f );
+  CHECK( ha_4->at( 0 )[0].tdelay[0] == 1.2f );
+  CHECK( ha_4->at( 0 )[0].tdelay[1] == 1.2f );
+  CHECK( ha_4->at( 0 )[0].polarity == 1u );
+  CHECK( ha_4->at( 1 ).size() == 1 );
+  CHECK( ha_4->at( 1 )[0].root->root->name == "ha" );
+  CHECK( ha_4->at( 1 )[0].area == 3.0f );
+  CHECK( ha_4->at( 1 )[0].tdelay[0] == 2.1f );
+  CHECK( ha_4->at( 1 )[0].tdelay[1] == 2.1f );
+  CHECK( ha_4->at( 1 )[0].polarity == 5u );
+
+  kitty::create_from_hex_string( tt, "2" );
+  multi_tt[0] = kitty::extend_to<6>( tt );
+  auto const ha_2 = lib.get_multi_supergates( multi_tt );
+  CHECK( ha_2 != nullptr );
+  CHECK( ha_2->size() == 2 );
+  CHECK( ha_2->at( 0 ).size() == 1 );
+  CHECK( ha_2->at( 0 )[0].root->root->name == "ha" );
+  CHECK( ha_2->at( 0 )[0].area == 3.0f );
+  CHECK( ha_2->at( 0 )[0].tdelay[0] == 1.2f );
+  CHECK( ha_2->at( 0 )[0].tdelay[1] == 1.2f );
+  CHECK( ha_2->at( 0 )[0].polarity == 2u );
+  CHECK( ha_2->at( 1 ).size() == 1 );
+  CHECK( ha_2->at( 1 )[0].root->root->name == "ha" );
+  CHECK( ha_2->at( 1 )[0].area == 3.0f );
+  CHECK( ha_2->at( 1 )[0].tdelay[0] == 2.1f );
+  CHECK( ha_2->at( 1 )[0].tdelay[1] == 2.1f );
+  CHECK( ha_2->at( 1 )[0].polarity == 6u );
+}
+
+TEST_CASE( "Multi-output library generation 3", "[tech_library]" )
+{
+  std::vector<gate> gates;
+
+  std::istringstream in( multioutput_test_library );
+  auto result = lorina::read_genlib( in, genlib_reader( gates ) );
+
+  CHECK( result == lorina::return_code::success );
+
+  tech_library_params tps;
+  tps.load_multioutput_gates = true;
+  tps.load_multioutput_gates_single = true;
+  tps.remove_dominated_gates = false;
+  tech_library<2, classification_type::np_configurations> lib( gates, tps );
+
+  CHECK( lib.max_gate_size() == 2 );
+  CHECK( lib.get_inverter_info() == std::make_tuple( 1.0f, 0.9f, 0u ) );
+  CHECK( lib.get_buffer_info() == std::make_tuple( 2.0f, 1.0f, 2u ) );
+
+  kitty::static_truth_table<2> tt;
+
+  kitty::create_from_hex_string( tt, "5" );
+  auto const inv = lib.get_supergates( kitty::extend_to<6>( tt ) );
+  CHECK( inv != nullptr );
+  CHECK( inv->size() == 2 );
+  CHECK( ( *inv )[0].root->root->name == "inv1" );
+  CHECK( ( *inv )[0].area == 1.0f );
+  CHECK( ( *inv )[0].tdelay[0] == 0.9f );
+  CHECK( ( *inv )[0].polarity == 0u );
+  CHECK( ( *inv )[1].root->root->name == "inv2" );
+  CHECK( ( *inv )[1].area == 2.0f );
+  CHECK( ( *inv )[1].tdelay[0] == 1.0f );
+  CHECK( ( *inv )[1].polarity == 0u );
+
+  kitty::create_from_hex_string( tt, "7" );
+  auto const nand_7 = lib.get_supergates( kitty::extend_to<6>( tt ) );
+  CHECK( nand_7 != nullptr );
+  CHECK( nand_7->size() == 1 );
+  CHECK( ( *nand_7 )[0].root->root->name == "nand2" );
+  CHECK( ( *nand_7 )[0].area == 2.0f );
+  CHECK( ( *nand_7 )[0].tdelay[0] == 1.0f );
+  CHECK( ( *nand_7 )[0].tdelay[1] == 1.0f );
+  CHECK( ( *nand_7 )[0].polarity == 0u );
+
+  kitty::create_from_hex_string( tt, "b" );
+  auto const nand_b = lib.get_supergates( kitty::extend_to<6>( tt ) );
+  CHECK( nand_b != nullptr );
+  CHECK( nand_b->size() == 1 );
+  CHECK( ( *nand_b )[0].root->root->name == "nand2" );
+  CHECK( ( *nand_b )[0].area == 2.0f );
+  CHECK( ( *nand_b )[0].tdelay[0] == 1.0f );
+  CHECK( ( *nand_b )[0].tdelay[1] == 1.0f );
+  CHECK( ( *nand_b )[0].polarity == 1u );
+
+  kitty::create_from_hex_string( tt, "d" );
+  auto const nand_d = lib.get_supergates( kitty::extend_to<6>( tt ) );
+  CHECK( nand_d != nullptr );
+  CHECK( nand_d->size() == 1 );
+  CHECK( ( *nand_d )[0].root->root->name == "nand2" );
+  CHECK( ( *nand_d )[0].area == 2.0f );
+  CHECK( ( *nand_d )[0].tdelay[0] == 1.0f );
+  CHECK( ( *nand_d )[0].tdelay[1] == 1.0f );
+  CHECK( ( *nand_d )[0].polarity == 2u );
+
+  kitty::create_from_hex_string( tt, "e" );
+  auto const nand_e = lib.get_supergates( kitty::extend_to<6>( tt ) );
+  CHECK( nand_e != nullptr );
+  CHECK( nand_e->size() == 1 );
+  CHECK( ( *nand_e )[0].root->root->name == "nand2" );
+  CHECK( ( *nand_e )[0].area == 2.0f );
+  CHECK( ( *nand_e )[0].tdelay[0] == 1.0f );
+  CHECK( ( *nand_e )[0].tdelay[1] == 1.0f );
+  CHECK( ( *nand_e )[0].polarity == 3u );
+
+  kitty::create_from_hex_string( tt, "8" );
+  auto const and_8 = lib.get_supergates( kitty::extend_to<6>( tt ) );
+  CHECK( and_8 != nullptr );
+  CHECK( and_8->size() == 1 );
+  CHECK( ( *and_8 )[0].root->root->name == "ha" );
+  CHECK( ( *and_8 )[0].area == 6.0f );
+  CHECK( ( *and_8 )[0].tdelay[0] == 1.2f );
+  CHECK( ( *and_8 )[0].tdelay[1] == 1.2f );
+  CHECK( ( *and_8 )[0].polarity == 0u );
+
+  kitty::create_from_hex_string( tt, "4" );
+  auto const and_4 = lib.get_supergates( kitty::extend_to<6>( tt ) );
+  CHECK( and_4 != nullptr );
+  CHECK( and_4->size() == 1 );
+  CHECK( ( *and_4 )[0].root->root->name == "ha" );
+  CHECK( ( *and_4 )[0].area == 6.0f );
+  CHECK( ( *and_4 )[0].tdelay[0] == 1.2f );
+  CHECK( ( *and_4 )[0].tdelay[1] == 1.2f );
+  CHECK( ( *and_4 )[0].polarity == 1u );
+
+  kitty::create_from_hex_string( tt, "2" );
+  auto const and_2 = lib.get_supergates( kitty::extend_to<6>( tt ) );
+  CHECK( and_2 != nullptr );
+  CHECK( and_2->size() == 1 );
+  CHECK( ( *and_2 )[0].root->root->name == "ha" );
+  CHECK( ( *and_2 )[0].area == 6.0f );
+  CHECK( ( *and_2 )[0].tdelay[0] == 1.2f );
+  CHECK( ( *and_2 )[0].tdelay[1] == 1.2f );
+  CHECK( ( *and_2 )[0].polarity == 2u );
+
+  kitty::create_from_hex_string( tt, "1" );
+  auto const and_1 = lib.get_supergates( kitty::extend_to<6>( tt ) );
+  CHECK( and_1 != nullptr );
+  CHECK( and_1->size() == 1 );
+  CHECK( ( *and_1 )[0].root->root->name == "ha" );
+  CHECK( ( *and_1 )[0].area == 6.0f );
+  CHECK( ( *and_1 )[0].tdelay[0] == 1.2f );
+  CHECK( ( *and_1 )[0].tdelay[1] == 1.2f );
+  CHECK( ( *and_1 )[0].polarity == 3u );
+
+  kitty::create_from_hex_string( tt, "6" );
+  auto const xor_6 = lib.get_supergates( kitty::extend_to<6>( tt ) );
+  CHECK( xor_6 != nullptr );
+  CHECK( xor_6->size() == 2 );
+  CHECK( ( *xor_6 )[0].root->root->name == "ha" );
+  CHECK( ( *xor_6 )[0].area == 6.0f );
+  CHECK( ( *xor_6 )[0].tdelay[0] == 2.1f );
+  CHECK( ( *xor_6 )[0].tdelay[1] == 2.1f );
+  CHECK( ( *xor_6 )[0].polarity == 0u );
+  CHECK( ( *xor_6 )[1].root->root->name == "ha" );
+  CHECK( ( *xor_6 )[1].area == 6.0f );
+  CHECK( ( *xor_6 )[1].tdelay[0] == 2.1f );
+  CHECK( ( *xor_6 )[1].tdelay[1] == 2.1f );
+  CHECK( ( *xor_6 )[1].polarity == 3u );
+
+  kitty::create_from_hex_string( tt, "9" );
+  auto const xor_9 = lib.get_supergates( kitty::extend_to<6>( tt ) );
+  CHECK( xor_9 != nullptr );
+  CHECK( xor_9->size() == 2 );
+  CHECK( ( *xor_9 )[0].root->root->name == "ha" );
+  CHECK( ( *xor_9 )[0].area == 6.0f );
+  CHECK( ( *xor_9 )[0].tdelay[0] == 2.1f );
+  CHECK( ( *xor_9 )[0].tdelay[1] == 2.1f );
+  CHECK( ( *xor_9 )[0].polarity == 1u );
+  CHECK( ( *xor_9 )[1].root->root->name == "ha" );
+  CHECK( ( *xor_9 )[1].area == 6.0f );
+  CHECK( ( *xor_9 )[1].tdelay[0] == 2.1f );
+  CHECK( ( *xor_9 )[1].tdelay[1] == 2.1f );
+  CHECK( ( *xor_9 )[1].polarity == 2u );
+
+  std::array<kitty::static_truth_table<6>, 2> multi_tt;
+  CHECK( lib.num_multioutput_gates() == 4 );
+
+  kitty::create_from_hex_string( tt, "6" );
+  multi_tt[0] = kitty::extend_to<6>( tt );
+  kitty::create_from_hex_string( tt, "8" );
+  multi_tt[1] = kitty::extend_to<6>( tt );
+  auto const ha_8 = lib.get_multi_supergates( multi_tt );
+  CHECK( ha_8 != nullptr );
+  CHECK( ha_8->size() == 2 );
+  CHECK( ha_8->at( 0 ).size() == 1 );
+  CHECK( ha_8->at( 0 )[0].root->root->name == "ha" );
+  CHECK( ha_8->at( 0 )[0].area == 3.0f );
+  CHECK( ha_8->at( 0 )[0].tdelay[0] == 2.1f );
+  CHECK( ha_8->at( 0 )[0].tdelay[1] == 2.1f );
+  CHECK( ha_8->at( 0 )[0].polarity == 0u );
+  CHECK( ha_8->at( 1 ).size() == 1 );
+  CHECK( ha_8->at( 1 )[0].root->root->name == "ha" );
+  CHECK( ha_8->at( 1 )[0].area == 3.0f );
+  CHECK( ha_8->at( 1 )[0].tdelay[0] == 1.2f );
+  CHECK( ha_8->at( 1 )[0].tdelay[1] == 1.2f );
+  CHECK( ha_8->at( 1 )[0].polarity == 0u );
+
+  kitty::create_from_hex_string( tt, "e" );
+  multi_tt[1] = kitty::extend_to<6>( tt );
+  auto const ha_e = lib.get_multi_supergates( multi_tt );
+  CHECK( ha_e != nullptr );
+  CHECK( ha_e->size() == 2 );
+  CHECK( ha_e->at( 0 ).size() == 1 );
+  CHECK( ha_e->at( 0 )[0].root->root->name == "ha" );
+  CHECK( ha_e->at( 0 )[0].area == 3.0f );
+  CHECK( ha_e->at( 0 )[0].tdelay[0] == 2.1f );
+  CHECK( ha_e->at( 0 )[0].tdelay[1] == 2.1f );
+  CHECK( ha_e->at( 0 )[0].polarity == 3u );
+  CHECK( ha_e->at( 1 ).size() == 1 );
+  CHECK( ha_e->at( 1 )[0].root->root->name == "ha" );
+  CHECK( ha_e->at( 1 )[0].area == 3.0f );
+  CHECK( ha_e->at( 1 )[0].tdelay[0] == 1.2f );
+  CHECK( ha_e->at( 1 )[0].tdelay[1] == 1.2f );
+  CHECK( ha_e->at( 1 )[0].polarity == 7u );
+
+  multi_tt[1] = multi_tt[0];
+
+  kitty::create_from_hex_string( tt, "4" );
+  multi_tt[0] = kitty::extend_to<6>( tt );
+  auto const ha_4 = lib.get_multi_supergates( multi_tt );
+  CHECK( ha_4 != nullptr );
+  CHECK( ha_4->size() == 2 );
+  CHECK( ha_4->at( 0 ).size() == 1 );
+  CHECK( ha_4->at( 0 )[0].root->root->name == "ha" );
+  CHECK( ha_4->at( 0 )[0].area == 3.0f );
+  CHECK( ha_4->at( 0 )[0].tdelay[0] == 1.2f );
+  CHECK( ha_4->at( 0 )[0].tdelay[1] == 1.2f );
+  CHECK( ha_4->at( 0 )[0].polarity == 1u );
+  CHECK( ha_4->at( 1 ).size() == 1 );
+  CHECK( ha_4->at( 1 )[0].root->root->name == "ha" );
+  CHECK( ha_4->at( 1 )[0].area == 3.0f );
+  CHECK( ha_4->at( 1 )[0].tdelay[0] == 2.1f );
+  CHECK( ha_4->at( 1 )[0].tdelay[1] == 2.1f );
+  CHECK( ha_4->at( 1 )[0].polarity == 5u );
+
+  kitty::create_from_hex_string( tt, "2" );
+  multi_tt[0] = kitty::extend_to<6>( tt );
+  auto const ha_2 = lib.get_multi_supergates( multi_tt );
+  CHECK( ha_2 != nullptr );
+  CHECK( ha_2->size() == 2 );
+  CHECK( ha_2->at( 0 ).size() == 1 );
+  CHECK( ha_2->at( 0 )[0].root->root->name == "ha" );
+  CHECK( ha_2->at( 0 )[0].area == 3.0f );
+  CHECK( ha_2->at( 0 )[0].tdelay[0] == 1.2f );
+  CHECK( ha_2->at( 0 )[0].tdelay[1] == 1.2f );
+  CHECK( ha_2->at( 0 )[0].polarity == 2u );
+  CHECK( ha_2->at( 1 ).size() == 1 );
+  CHECK( ha_2->at( 1 )[0].root->root->name == "ha" );
+  CHECK( ha_2->at( 1 )[0].area == 3.0f );
+  CHECK( ha_2->at( 1 )[0].tdelay[0] == 2.1f );
+  CHECK( ha_2->at( 1 )[0].tdelay[1] == 2.1f );
+  CHECK( ha_2->at( 1 )[0].polarity == 6u );
+}
+
+TEST_CASE( "Large library generation", "[tech_library]" )
+{
+  std::vector<gate> gates;
+
+  std::istringstream in( large_test_library );
+  auto result = lorina::read_genlib( in, genlib_reader( gates ) );
+
+  CHECK( result == lorina::return_code::success );
+
+  tech_library_params tps;
+  tps.load_large_gates = true;
+  tech_library<7> lib( gates, tps );
+
+  CHECK( lib.max_gate_size() == 2 );
+  CHECK( lib.get_inverter_info() == std::make_tuple( 1.0f, 0.9f, 0u ) );
+  CHECK( lib.get_buffer_info() == std::make_tuple( 2.0f, 1.0f, 2u ) );
+
+  uint32_t pattern_and1 = lib.get_pattern_id( 3, 3 );
+  CHECK( pattern_and1 != UINT32_MAX );
+  uint32_t pattern_and2 = lib.get_pattern_id( 3, pattern_and1 << 1 );
+  CHECK( pattern_and2 != UINT32_MAX );
+  uint32_t pattern_and3 = lib.get_pattern_id( 3, 2 );
+  CHECK( pattern_and3 != UINT32_MAX );
+  uint32_t pattern_and4 = lib.get_pattern_id( 2, 3 );
+  CHECK( pattern_and4 != UINT32_MAX );
+  uint32_t pattern_and5 = lib.get_pattern_id( ( pattern_and3 << 1 ) | 1, ( pattern_and4 << 1 ) | 1 );
+  CHECK( pattern_and5 != UINT32_MAX );
+  uint32_t pattern_and6 = lib.get_pattern_id( ( pattern_and2 << 1 ) | 1, pattern_and5 << 1 );
+  CHECK( pattern_and6 != UINT32_MAX );
+  uint32_t pattern_and7 = lib.get_pattern_id( ( pattern_and2 << 1 ) | 1, ( pattern_and3 << 1 ) | 1 );
+  CHECK( pattern_and7 != UINT32_MAX );
+  uint32_t pattern_and8 = lib.get_pattern_id( ( pattern_and4 << 1 ) | 1, pattern_and7 << 1 );
+  CHECK( pattern_and8 != UINT32_MAX );
+
+  CHECK( lib.get_supergates_pattern( pattern_and1, true ) != nullptr );
+  CHECK( lib.get_supergates_pattern( pattern_and1, false ) == nullptr );
+  CHECK( lib.get_supergates_pattern( pattern_and2, true ) == nullptr );
+  CHECK( lib.get_supergates_pattern( pattern_and2, false ) == nullptr );
+  CHECK( lib.get_supergates_pattern( pattern_and3, true ) != nullptr );
+  CHECK( lib.get_supergates_pattern( pattern_and3, false ) == nullptr );
+  CHECK( lib.get_supergates_pattern( pattern_and4, true ) != nullptr );
+  CHECK( lib.get_supergates_pattern( pattern_and4, false ) == nullptr );
+  CHECK( lib.get_supergates_pattern( pattern_and5, true ) == nullptr );
+  CHECK( lib.get_supergates_pattern( pattern_and5, false ) == nullptr );
+  CHECK( lib.get_supergates_pattern( pattern_and6, true ) != nullptr );
+  CHECK( lib.get_supergates_pattern( pattern_and6, false ) == nullptr );
+  CHECK( lib.get_supergates_pattern( pattern_and7, true ) == nullptr );
+  CHECK( lib.get_supergates_pattern( pattern_and7, false ) == nullptr );
+  CHECK( lib.get_supergates_pattern( pattern_and8, true ) != nullptr );
+  CHECK( lib.get_supergates_pattern( pattern_and8, false ) == nullptr );
+}
+
 TEST_CASE( "Complete library generation", "[tech_library]" )
 {
   std::vector<gate> gates;
@@ -379,7 +998,9 @@ TEST_CASE( "Complete library generation", "[tech_library]" )
 
   CHECK( result == lorina::return_code::success );
 
-  tech_library<4, classification_type::np_configurations> lib( gates );
+  tech_library_params ps;
+  ps.remove_dominated_gates = false;
+  tech_library<4, classification_type::np_configurations> lib( gates, ps );
 
   CHECK( lib.max_gate_size() == 4 );
   CHECK( lib.get_inverter_info() == std::make_tuple( 1.0f, 0.9f, 2u ) );
@@ -389,7 +1010,7 @@ TEST_CASE( "Complete library generation", "[tech_library]" )
     auto const tt = gate.function;
 
     const auto test_enumeration = [&]( auto const& tt, auto, auto ) {
-      const auto static_tt = kitty::extend_to<4>( tt );
+      const auto static_tt = kitty::extend_to<6>( tt );
 
       auto const supergates = lib.get_supergates( static_tt );
 
diff --git a/test/views/cell_view.cpp b/test/views/cell_view.cpp
new file mode 100644
index 000000000..0f55e45aa
--- /dev/null
+++ b/test/views/cell_view.cpp
@@ -0,0 +1,202 @@
+#include <catch.hpp>
+
+#include <sstream>
+
+#include <lorina/genlib.hpp>
+#include <mockturtle/io/genlib_reader.hpp>
+#include <mockturtle/networks/block.hpp>
+#include <mockturtle/utils/standard_cell.hpp>
+#include <mockturtle/views/cell_view.hpp>
+
+using namespace mockturtle;
+
+std::string const simple_library = "GATE zero 0 O=CONST0;\n"
+                                   "GATE one 0 O=CONST1;\n"
+                                   "GATE inverter 1 O=!a; PIN * INV 1 999 1.0 1.0 1.0 1.0\n"
+                                   "GATE buffer 2 O=a; PIN * NONINV 1 999 1.0 1.0 1.0 1.0\n"
+                                   "GATE and 5 O=a*b; PIN * NONINV 1 999 1.0 1.0 1.0 1.0\n"
+                                   "GATE or 5 O=a+b; PIN * NONINV 1 999 1.0 1.0 1.0 1.0\n"
+                                   "GATE ha 7 O=!(a*b); PIN * INV 1 999 1.0 1.0 1.0 1.0\n"
+                                   "GATE ha 7 O=!a*!b+a*b; PIN * INV 1 999 2.0 1.0 2.0 1.0\n";
+
+TEST_CASE( "Create cell view", "[cell_view]" )
+{
+  std::vector<gate> gates;
+
+  std::istringstream in( simple_library );
+  auto result = lorina::read_genlib( in, genlib_reader( gates ) );
+
+  CHECK( result == lorina::return_code::success );
+
+  std::vector<standard_cell> cells = get_standard_cells( gates );
+
+  CHECK( cells.size() == 7 );
+
+  cell_view<block_network> ntk( cells );
+
+  auto const a = ntk.create_pi();
+  auto const b = ntk.create_pi();
+  auto const c = ntk.create_pi();
+  auto const d = ntk.create_pi();
+
+  auto const c0 = ntk.get_constant( false );
+  auto const t1 = ntk.create_and( a, b );
+  auto const t2 = ntk.create_or( c, d );
+  auto const t3 = ntk.create_hai( t1, d );
+  auto const f = ntk.create_and( t1, t2 );
+  auto const g = ntk.create_not( a );
+
+  ntk.create_po( t3 );
+  ntk.create_po( ntk.next_output_pin( t3 ) );
+  ntk.create_po( f );
+  ntk.create_po( g );
+  ntk.create_po( ntk.get_constant() );
+
+  ntk.add_cell( ntk.get_node( c0 ), 0 );
+  ntk.add_cell( ntk.get_node( t1 ), 4 );
+  ntk.add_cell( ntk.get_node( t2 ), 5 );
+  ntk.add_cell( ntk.get_node( t3 ), 6 );
+  ntk.add_cell( ntk.get_node( f ), 4 );
+  ntk.add_cell( ntk.get_node( g ), 2 );
+
+  CHECK( ntk.has_cell( ntk.get_node( a ) ) == false );
+  CHECK( ntk.has_cell( ntk.get_node( b ) ) == false );
+  CHECK( ntk.has_cell( ntk.get_node( c ) ) == false );
+  CHECK( ntk.has_cell( ntk.get_node( d ) ) == false );
+  CHECK( ntk.has_cell( ntk.get_node( c0 ) ) == true );
+  CHECK( ntk.has_cell( ntk.get_node( t1 ) ) == true );
+  CHECK( ntk.has_cell( ntk.get_node( t2 ) ) == true );
+  CHECK( ntk.has_cell( ntk.get_node( t3 ) ) == true );
+  CHECK( ntk.has_cell( ntk.get_node( f ) ) == true );
+  CHECK( ntk.has_cell( ntk.get_node( g ) ) == true );
+
+  CHECK( ntk.get_cell_index( ntk.get_node( c0 ) ) == 0 );
+  CHECK( ntk.get_cell_index( ntk.get_node( t1 ) ) == 4 );
+  CHECK( ntk.get_cell_index( ntk.get_node( t2 ) ) == 5 );
+  CHECK( ntk.get_cell_index( ntk.get_node( t3 ) ) == 6 );
+  CHECK( ntk.get_cell_index( ntk.get_node( f ) ) == 4 );
+  CHECK( ntk.get_cell_index( ntk.get_node( g ) ) == 2 );
+
+  CHECK( ntk.get_cell( ntk.get_node( c0 ) ).name == "zero" );
+  CHECK( ntk.get_cell( ntk.get_node( t1 ) ).name == "and" );
+  CHECK( ntk.get_cell( ntk.get_node( t2 ) ).name == "or" );
+  CHECK( ntk.get_cell( ntk.get_node( t3 ) ).name == "ha" );
+  CHECK( ntk.get_cell( ntk.get_node( f ) ).name == "and" );
+  CHECK( ntk.get_cell( ntk.get_node( g ) ).name == "inverter" );
+
+  CHECK( ntk.get_cell( ntk.get_node( c0 ) ).gates.size() == 1 );
+  CHECK( ntk.get_cell( ntk.get_node( t1 ) ).gates.size() == 1 );
+  CHECK( ntk.get_cell( ntk.get_node( t2 ) ).gates.size() == 1 );
+  CHECK( ntk.get_cell( ntk.get_node( t3 ) ).gates.size() == 2 );
+  CHECK( ntk.get_cell( ntk.get_node( f ) ).gates.size() == 1 );
+  CHECK( ntk.get_cell( ntk.get_node( g ) ).gates.size() == 1 );
+
+  CHECK( ntk.compute_area() == 23 );
+  CHECK( ntk.compute_worst_delay() == 3 );
+
+  std::stringstream report_stats;
+  ntk.report_stats( report_stats );
+  CHECK( report_stats.str() == "[i] Report stats: area = 23.00; delay =  3.00;\n" );
+
+  std::stringstream report_gates;
+  ntk.report_cells_usage( report_gates );
+  CHECK( report_gates.str() == "[i] Report cells usage:\n"
+                               "[i] zero                     \t Instance =          1\t Area =         0.00     0.00 %\n"
+                               "[i] inverter                 \t Instance =          1\t Area =         1.00     4.35 %\n"
+                               "[i] and                      \t Instance =          2\t Area =        10.00    43.48 %\n"
+                               "[i] or                       \t Instance =          1\t Area =         5.00    21.74 %\n"
+                               "[i] ha                       \t Instance =          1\t Area =         7.00    30.43 %\n"
+                               "[i] TOTAL                    \t Instance =          6\t Area =        23.00   100.00 %\n" );
+}
+
+TEST_CASE( "Cell view on copy", "[cell_view]" )
+{
+  std::vector<gate> gates;
+
+  std::istringstream in( simple_library );
+  auto result = lorina::read_genlib( in, genlib_reader( gates ) );
+
+  CHECK( result == lorina::return_code::success );
+
+  std::vector<standard_cell> cells = get_standard_cells( gates );
+
+  CHECK( cells.size() == 7 );
+
+  cell_view<block_network> ntk( cells );
+
+  auto const a = ntk.create_pi();
+  auto const b = ntk.create_pi();
+  auto const c = ntk.create_pi();
+  auto const d = ntk.create_pi();
+
+  auto const c0 = ntk.get_constant( false );
+  auto const t1 = ntk.create_and( a, b );
+  auto const t2 = ntk.create_or( c, d );
+  auto const t3 = ntk.create_hai( t1, d );
+  auto const f = ntk.create_and( t1, t2 );
+  auto const g = ntk.create_not( a );
+
+  ntk.create_po( t3 );
+  ntk.create_po( ntk.next_output_pin( t3 ) );
+  ntk.create_po( f );
+  ntk.create_po( g );
+  ntk.create_po( ntk.get_constant() );
+
+  ntk.add_cell( ntk.get_node( c0 ), 0 );
+  ntk.add_cell( ntk.get_node( t1 ), 4 );
+  ntk.add_cell( ntk.get_node( t2 ), 5 );
+  ntk.add_cell( ntk.get_node( t3 ), 6 );
+  ntk.add_cell( ntk.get_node( f ), 4 );
+  ntk.add_cell( ntk.get_node( g ), 2 );
+
+  cell_view<block_network> ntk_copy = ntk;
+
+  CHECK( ntk_copy.has_cell( ntk_copy.get_node( a ) ) == false );
+  CHECK( ntk_copy.has_cell( ntk_copy.get_node( b ) ) == false );
+  CHECK( ntk_copy.has_cell( ntk_copy.get_node( c ) ) == false );
+  CHECK( ntk_copy.has_cell( ntk_copy.get_node( d ) ) == false );
+  CHECK( ntk_copy.has_cell( ntk_copy.get_node( c0 ) ) == true );
+  CHECK( ntk_copy.has_cell( ntk_copy.get_node( t1 ) ) == true );
+  CHECK( ntk_copy.has_cell( ntk_copy.get_node( t2 ) ) == true );
+  CHECK( ntk_copy.has_cell( ntk_copy.get_node( t3 ) ) == true );
+  CHECK( ntk_copy.has_cell( ntk_copy.get_node( f ) ) == true );
+  CHECK( ntk_copy.has_cell( ntk_copy.get_node( g ) ) == true );
+
+  CHECK( ntk_copy.get_cell_index( ntk_copy.get_node( c0 ) ) == 0 );
+  CHECK( ntk_copy.get_cell_index( ntk_copy.get_node( t1 ) ) == 4 );
+  CHECK( ntk_copy.get_cell_index( ntk_copy.get_node( t2 ) ) == 5 );
+  CHECK( ntk_copy.get_cell_index( ntk_copy.get_node( t3 ) ) == 6 );
+  CHECK( ntk_copy.get_cell_index( ntk_copy.get_node( f ) ) == 4 );
+  CHECK( ntk_copy.get_cell_index( ntk_copy.get_node( g ) ) == 2 );
+
+  CHECK( ntk_copy.get_cell( ntk_copy.get_node( c0 ) ).name == "zero" );
+  CHECK( ntk_copy.get_cell( ntk_copy.get_node( t1 ) ).name == "and" );
+  CHECK( ntk_copy.get_cell( ntk_copy.get_node( t2 ) ).name == "or" );
+  CHECK( ntk_copy.get_cell( ntk_copy.get_node( t3 ) ).name == "ha" );
+  CHECK( ntk_copy.get_cell( ntk_copy.get_node( f ) ).name == "and" );
+  CHECK( ntk_copy.get_cell( ntk_copy.get_node( g ) ).name == "inverter" );
+
+  CHECK( ntk_copy.get_cell( ntk_copy.get_node( c0 ) ).gates.size() == 1 );
+  CHECK( ntk_copy.get_cell( ntk_copy.get_node( t1 ) ).gates.size() == 1 );
+  CHECK( ntk_copy.get_cell( ntk_copy.get_node( t2 ) ).gates.size() == 1 );
+  CHECK( ntk_copy.get_cell( ntk_copy.get_node( t3 ) ).gates.size() == 2 );
+  CHECK( ntk_copy.get_cell( ntk_copy.get_node( f ) ).gates.size() == 1 );
+  CHECK( ntk_copy.get_cell( ntk_copy.get_node( g ) ).gates.size() == 1 );
+
+  CHECK( ntk_copy.compute_area() == 23 );
+  CHECK( ntk_copy.compute_worst_delay() == 3 );
+
+  std::stringstream report_stats;
+  ntk_copy.report_stats( report_stats );
+  CHECK( report_stats.str() == "[i] Report stats: area = 23.00; delay =  3.00;\n" );
+
+  std::stringstream report_gates;
+  ntk_copy.report_cells_usage( report_gates );
+  CHECK( report_gates.str() == "[i] Report cells usage:\n"
+                               "[i] zero                     \t Instance =          1\t Area =         0.00     0.00 %\n"
+                               "[i] inverter                 \t Instance =          1\t Area =         1.00     4.35 %\n"
+                               "[i] and                      \t Instance =          2\t Area =        10.00    43.48 %\n"
+                               "[i] or                       \t Instance =          1\t Area =         5.00    21.74 %\n"
+                               "[i] ha                       \t Instance =          1\t Area =         7.00    30.43 %\n"
+                               "[i] TOTAL                    \t Instance =          6\t Area =        23.00   100.00 %\n" );
+}
diff --git a/test/views/dont_touch_view.cpp b/test/views/dont_touch_view.cpp
new file mode 100644
index 000000000..58a78f468
--- /dev/null
+++ b/test/views/dont_touch_view.cpp
@@ -0,0 +1,192 @@
+#include <catch.hpp>
+
+#include <sstream>
+
+#include <mockturtle/networks/klut.hpp>
+#include <mockturtle/views/dont_touch_view.hpp>
+
+using namespace mockturtle;
+
+TEST_CASE( "Create dont touch view 1", "[dont_touch_view]" )
+{
+  dont_touch_view<klut_network> ntk{};
+
+  auto const a = ntk.create_pi();
+  auto const b = ntk.create_pi();
+  auto const c = ntk.create_pi();
+  auto const d = ntk.create_pi();
+
+  auto const c0 = ntk.get_constant( false );
+  auto const t1 = ntk.create_and( a, b );
+  auto const t2 = ntk.create_or( c, d );
+  auto const f = ntk.create_and( t1, t2 );
+  auto const g = ntk.create_not( a );
+
+  ntk.create_po( f );
+  ntk.create_po( g );
+  ntk.create_po( ntk.get_constant() );
+
+  ntk.select_dont_touch( ntk.get_node( c0 ) );
+  ntk.select_dont_touch( ntk.get_node( t1 ) );
+  ntk.select_dont_touch( ntk.get_node( t2 ) );
+  ntk.select_dont_touch( ntk.get_node( f ) );
+  ntk.select_dont_touch( ntk.get_node( g ) );
+
+  CHECK( ntk.is_dont_touch( ntk.get_node( a ) ) == false );
+  CHECK( ntk.is_dont_touch( ntk.get_node( b ) ) == false );
+  CHECK( ntk.is_dont_touch( ntk.get_node( c ) ) == false );
+  CHECK( ntk.is_dont_touch( ntk.get_node( d ) ) == false );
+  CHECK( ntk.is_dont_touch( ntk.get_node( c0 ) ) == true );
+  CHECK( ntk.is_dont_touch( ntk.get_node( t1 ) ) == true );
+  CHECK( ntk.is_dont_touch( ntk.get_node( t2 ) ) == true );
+  CHECK( ntk.is_dont_touch( ntk.get_node( f ) ) == true );
+  CHECK( ntk.is_dont_touch( ntk.get_node( g ) ) == true );
+
+  uint32_t count = 0;
+  ntk.foreach_dont_touch( [&]( auto const& n ) {
+    (void)n;
+    ++count;
+  } );
+
+  CHECK( count == 5 );
+
+  ntk.remove_dont_touch( ntk.get_node( t1 ) );
+  ntk.remove_dont_touch( ntk.get_node( t2 ) );
+
+  CHECK( ntk.is_dont_touch( ntk.get_node( a ) ) == false );
+  CHECK( ntk.is_dont_touch( ntk.get_node( b ) ) == false );
+  CHECK( ntk.is_dont_touch( ntk.get_node( c ) ) == false );
+  CHECK( ntk.is_dont_touch( ntk.get_node( d ) ) == false );
+  CHECK( ntk.is_dont_touch( ntk.get_node( c0 ) ) == true );
+  CHECK( ntk.is_dont_touch( ntk.get_node( t1 ) ) == false );
+  CHECK( ntk.is_dont_touch( ntk.get_node( t2 ) ) == false );
+  CHECK( ntk.is_dont_touch( ntk.get_node( f ) ) == true );
+  CHECK( ntk.is_dont_touch( ntk.get_node( g ) ) == true );
+}
+
+TEST_CASE( "Create dont touch view 2", "[dont_touch_view]" )
+{
+  klut_network ntk;
+
+  auto const a = ntk.create_pi();
+  auto const b = ntk.create_pi();
+  auto const c = ntk.create_pi();
+  auto const d = ntk.create_pi();
+
+  auto const c0 = ntk.get_constant( false );
+  auto const t1 = ntk.create_and( a, b );
+  auto const t2 = ntk.create_or( c, d );
+  auto const f = ntk.create_and( t1, t2 );
+  auto const g = ntk.create_not( a );
+
+  ntk.create_po( f );
+  ntk.create_po( g );
+  ntk.create_po( ntk.get_constant() );
+
+  dont_touch_view<klut_network> dt_ntk{ ntk };
+
+  dt_ntk.select_dont_touch( dt_ntk.get_node( c0 ) );
+  dt_ntk.select_dont_touch( dt_ntk.get_node( t1 ) );
+  dt_ntk.select_dont_touch( dt_ntk.get_node( t2 ) );
+  dt_ntk.select_dont_touch( dt_ntk.get_node( t2 ) );
+  dt_ntk.select_dont_touch( dt_ntk.get_node( f ) );
+  dt_ntk.select_dont_touch( dt_ntk.get_node( g ) );
+
+  CHECK( dt_ntk.is_dont_touch( dt_ntk.get_node( a ) ) == false );
+  CHECK( dt_ntk.is_dont_touch( dt_ntk.get_node( b ) ) == false );
+  CHECK( dt_ntk.is_dont_touch( dt_ntk.get_node( c ) ) == false );
+  CHECK( dt_ntk.is_dont_touch( dt_ntk.get_node( d ) ) == false );
+  CHECK( dt_ntk.is_dont_touch( dt_ntk.get_node( c0 ) ) == true );
+  CHECK( dt_ntk.is_dont_touch( dt_ntk.get_node( t1 ) ) == true );
+  CHECK( dt_ntk.is_dont_touch( dt_ntk.get_node( t2 ) ) == true );
+  CHECK( dt_ntk.is_dont_touch( dt_ntk.get_node( f ) ) == true );
+  CHECK( dt_ntk.is_dont_touch( dt_ntk.get_node( g ) ) == true );
+
+  uint32_t count = 0;
+  dt_ntk.foreach_dont_touch( [&]( auto const& n ) {
+    (void)n;
+    ++count;
+  } );
+
+  CHECK( count == 5 );
+
+  dt_ntk.remove_dont_touch( dt_ntk.get_node( t1 ) );
+  dt_ntk.remove_dont_touch( dt_ntk.get_node( t2 ) );
+
+  CHECK( dt_ntk.is_dont_touch( dt_ntk.get_node( a ) ) == false );
+  CHECK( dt_ntk.is_dont_touch( dt_ntk.get_node( b ) ) == false );
+  CHECK( dt_ntk.is_dont_touch( dt_ntk.get_node( c ) ) == false );
+  CHECK( dt_ntk.is_dont_touch( dt_ntk.get_node( d ) ) == false );
+  CHECK( dt_ntk.is_dont_touch( dt_ntk.get_node( c0 ) ) == true );
+  CHECK( dt_ntk.is_dont_touch( dt_ntk.get_node( t1 ) ) == false );
+  CHECK( dt_ntk.is_dont_touch( dt_ntk.get_node( t2 ) ) == false );
+  CHECK( dt_ntk.is_dont_touch( dt_ntk.get_node( f ) ) == true );
+  CHECK( dt_ntk.is_dont_touch( dt_ntk.get_node( g ) ) == true );
+}
+
+TEST_CASE( "Dont touch view on copy", "[dont_touch_view]" )
+{
+  dont_touch_view<klut_network> ntk{};
+
+  auto const a = ntk.create_pi();
+  auto const b = ntk.create_pi();
+  auto const c = ntk.create_pi();
+  auto const d = ntk.create_pi();
+
+  auto const c0 = ntk.get_constant( false );
+  auto const t1 = ntk.create_and( a, b );
+  auto const t2 = ntk.create_or( c, d );
+  auto const f = ntk.create_and( t1, t2 );
+  auto const g = ntk.create_not( a );
+
+  ntk.create_po( f );
+  ntk.create_po( g );
+  ntk.create_po( ntk.get_constant() );
+
+  ntk.select_dont_touch( ntk.get_node( c0 ) );
+  ntk.select_dont_touch( ntk.get_node( t1 ) );
+  ntk.select_dont_touch( ntk.get_node( t2 ) );
+  ntk.select_dont_touch( ntk.get_node( f ) );
+  ntk.select_dont_touch( ntk.get_node( g ) );
+
+  dont_touch_view<klut_network> ntk_copy = ntk;
+
+  CHECK( ntk_copy.is_dont_touch( ntk_copy.get_node( a ) ) == false );
+  CHECK( ntk_copy.is_dont_touch( ntk_copy.get_node( b ) ) == false );
+  CHECK( ntk_copy.is_dont_touch( ntk_copy.get_node( c ) ) == false );
+  CHECK( ntk_copy.is_dont_touch( ntk_copy.get_node( d ) ) == false );
+  CHECK( ntk_copy.is_dont_touch( ntk_copy.get_node( c0 ) ) == true );
+  CHECK( ntk_copy.is_dont_touch( ntk_copy.get_node( t1 ) ) == true );
+  CHECK( ntk_copy.is_dont_touch( ntk_copy.get_node( t2 ) ) == true );
+  CHECK( ntk_copy.is_dont_touch( ntk_copy.get_node( f ) ) == true );
+  CHECK( ntk_copy.is_dont_touch( ntk_copy.get_node( g ) ) == true );
+
+  uint32_t count = 0;
+  ntk.foreach_dont_touch( [&]( auto const& n ) {
+    (void)n;
+    ++count;
+  } );
+
+  CHECK( count == 5 );
+
+  ntk.remove_dont_touch( ntk.get_node( t1 ) );
+  ntk.remove_dont_touch( ntk.get_node( t2 ) );
+
+  CHECK( ntk.is_dont_touch( ntk.get_node( a ) ) == false );
+  CHECK( ntk.is_dont_touch( ntk.get_node( b ) ) == false );
+  CHECK( ntk.is_dont_touch( ntk.get_node( c ) ) == false );
+  CHECK( ntk.is_dont_touch( ntk.get_node( d ) ) == false );
+  CHECK( ntk.is_dont_touch( ntk.get_node( c0 ) ) == true );
+  CHECK( ntk.is_dont_touch( ntk.get_node( t1 ) ) == false );
+  CHECK( ntk.is_dont_touch( ntk.get_node( t2 ) ) == false );
+  CHECK( ntk.is_dont_touch( ntk.get_node( f ) ) == true );
+  CHECK( ntk.is_dont_touch( ntk.get_node( g ) ) == true );
+
+  count = 0;
+  ntk.foreach_dont_touch( [&]( auto const& n ) {
+    (void)n;
+    ++count;
+  } );
+
+  CHECK( count == 3 );
+}