diff --git a/passes/techmap/Makefile.inc b/passes/techmap/Makefile.inc index 74813bca93f..0282af79045 100644 --- a/passes/techmap/Makefile.inc +++ b/passes/techmap/Makefile.inc @@ -49,6 +49,7 @@ OBJS += passes/techmap/dffunmap.o OBJS += passes/techmap/flowmap.o OBJS += passes/techmap/extractinv.o OBJS += passes/techmap/cellmatch.o +OBJS += passes/techmap/clockgate.o endif ifeq ($(DISABLE_SPAWN),0) diff --git a/passes/techmap/clockgate.cc b/passes/techmap/clockgate.cc new file mode 100644 index 00000000000..bf53b02bbc8 --- /dev/null +++ b/passes/techmap/clockgate.cc @@ -0,0 +1,227 @@ +#include "kernel/yosys.h" +#include "kernel/ff.h" +#include + +USING_YOSYS_NAMESPACE +PRIVATE_NAMESPACE_BEGIN + +struct ClockGateCell { + IdString name; + IdString ce_pin; + IdString clk_in_pin; + IdString clk_out_pin; +}; + +ClockGateCell icg_from_arg(std::string& name, std::string& str) { + ClockGateCell c; + c.name = RTLIL::escape_id(name); + char delimiter = ':'; + size_t pos1 = str.find(delimiter); + if (pos1 == std::string::npos) + log_cmd_error("Not enough ports in descriptor string"); + size_t pos2 = str.find(delimiter, pos1 + 1); + if (pos2 == std::string::npos) + log_cmd_error("Not enough ports in descriptor string"); + size_t pos3 = str.find(delimiter, pos2 + 1); + if (pos3 != std::string::npos) + log_cmd_error("Too many ports in descriptor string"); + + std::string ce = str.substr(0, pos1); + c.ce_pin = RTLIL::escape_id(ce); + + std::string clk_in = str.substr(pos1 + 1, pos2 - (pos1 + 1)); + c.clk_in_pin = RTLIL::escape_id(clk_in); + + std::string clk_out = str.substr(pos2 + 1, str.size() - (pos2 + 1)); + c.clk_out_pin = RTLIL::escape_id(clk_out); + return c; +} + +struct ClockgatePass : public Pass { + ClockgatePass() : Pass("clockgate", "extract clock gating out of flip flops") { } + void help() override { + // |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---| + log("\n"); + log(" clockgate [options] [selection]\n"); + log("\n"); + log("This pass transforms each set of FFs sharing the same clock and\n"); + log("enable signal into a clock-gating cell and a set of enable-less FFs.\n"); + log("Primarily a power-saving transformation on ASIC designs.\n"); + log("\n"); + log(" -pos ::\n"); + log(" If specified, rising-edge FFs will have CE inputs\n"); + log(" removed and a gated clock will be created by the\n"); + log(" user-specified ICG (integrated clock gating)\n"); + log(" cell with ports named , , .\n"); + log(" The ICG's clock enable pin must be active high.\n"); + log(" -neg ::\n"); + log(" If specified, falling-edge FFs will have CE inputs\n"); + log(" removed and a gated clock will be created by the\n"); + log(" user-specified ICG (integrated clock gating)\n"); + log(" cell with ports named , , .\n"); + log(" The ICG's clock enable pin must be active high.\n"); + log(" -tie_lo \n"); + log(" Port of the ICG will be tied to zero.\n"); + log(" Intended for DFT scan-enable pins.\n"); + log(" -min_net_size \n"); + log(" Only transform sets of at least eligible FFs.\n"); + // log(" \n"); + } + + // One ICG will be generated per ClkNetInfo + // if the number of FFs associated with it is sufficent + struct ClkNetInfo { + // Original, ungated clock into enabled FF + SigBit clk_bit; + // Original clock enable into enabled FF + SigBit ce_bit; + bool pol_clk; + bool pol_ce; + unsigned int hash() const { + auto t = std::make_tuple(clk_bit, ce_bit, pol_clk, pol_ce); + unsigned int h = mkhash_init; + h = mkhash(h, hash_ops::hash(t)); + return h; + } + bool operator==(const ClkNetInfo& other) const { + return (clk_bit == other.clk_bit) && + (ce_bit == other.ce_bit) && + (pol_clk == other.pol_clk) && + (pol_ce == other.pol_ce); + } + }; + + struct GClkNetInfo { + // How many CE FFs on this CLK net have we seen? + int net_size; + // After ICG generation, we have new gated CLK signals + Wire* new_net; + }; + + ClkNetInfo clk_info_from_ff(FfData& ff) { + SigBit clk = ff.sig_clk[0]; + SigBit ce = ff.sig_ce[0]; + ClkNetInfo info{clk, ce, ff.pol_clk, ff.pol_ce}; + return info; + } + + void execute(std::vector args, RTLIL::Design *design) override { + log_header(design, "Executing CLOCK_GATE pass (extract clock gating out of flip flops).\n"); + + std::optional pos_icg_desc; + std::optional neg_icg_desc; + std::vector tie_lo_ports; + int min_net_size = 0; + + size_t argidx; + for (argidx = 1; argidx < args.size(); argidx++) { + if (args[argidx] == "-pos" && argidx+2 < args.size()) { + auto name = args[++argidx]; + auto rest = args[++argidx]; + pos_icg_desc = icg_from_arg(name, rest); + } + if (args[argidx] == "-neg" && argidx+2 < args.size()) { + auto name = args[++argidx]; + auto rest = args[++argidx]; + neg_icg_desc = icg_from_arg(name, rest); + } + if (args[argidx] == "-tie_lo" && argidx+1 < args.size()) { + tie_lo_ports.push_back(RTLIL::escape_id(args[++argidx])); + } + if (args[argidx] == "-min_net_size" && argidx+1 < args.size()) { + min_net_size = atoi(args[++argidx].c_str()); + } + } + + extra_args(args, argidx, design); + + pool ce_ffs; + dict clk_nets; + + int gated_flop_count = 0; + for (auto module : design->selected_whole_modules()) { + for (auto cell : module->cells()) { + if (!RTLIL::builtin_ff_cell_types().count(cell->type)) + continue; + + FfData ff(nullptr, cell); + // It would be odd to get constants, but we better handle it + if (ff.has_ce) { + if (!ff.sig_clk.is_bit() || !ff.sig_ce.is_bit()) + continue; + if (!ff.sig_clk[0].is_wire() || !ff.sig_ce[0].is_wire()) + continue; + + ce_ffs.insert(cell); + + ClkNetInfo info = clk_info_from_ff(ff); + auto it = clk_nets.find(info); + if (it == clk_nets.end()) + clk_nets[info] = GClkNetInfo(); + clk_nets[info].net_size++; + } + } + + for (auto& clk_net : clk_nets) { + auto& clk = clk_net.first; + auto& gclk = clk_net.second; + + if (gclk.net_size < min_net_size) + continue; + + std::optional matching_icg_desc; + + if (pos_icg_desc && clk.pol_clk) + matching_icg_desc = pos_icg_desc; + else if (neg_icg_desc && !clk.pol_clk) + matching_icg_desc = neg_icg_desc; + + if (!matching_icg_desc) + continue; + + Cell* icg = module->addCell(NEW_ID, matching_icg_desc->name); + icg->setPort(matching_icg_desc->ce_pin, clk.ce_bit); + icg->setPort(matching_icg_desc->clk_in_pin, clk.clk_bit); + gclk.new_net = module->addWire(NEW_ID); + icg->setPort(matching_icg_desc->clk_out_pin, gclk.new_net); + // Tie low DFT ports like scan chain enable + for (auto port : tie_lo_ports) + icg->setPort(port, Const(0, 1)); + // Fix CE polarity if needed + if (!clk.pol_ce) { + SigBit ce_fixed_pol = module->NotGate(NEW_ID, clk.ce_bit); + icg->setPort(matching_icg_desc->ce_pin, ce_fixed_pol); + } + } + + for (auto cell : ce_ffs) { + FfData ff(nullptr, cell); + ClkNetInfo info = clk_info_from_ff(ff); + auto it = clk_nets.find(info); + log_assert(it != clk_nets.end() && "Bug: desync ce_ffs and clk_nets"); + + if (!it->second.new_net) + continue; + + log_debug("Fix up FF %s\n", cell->name.c_str()); + // Now we start messing with the design + ff.has_ce = false; + // Construct the clock gate + // ICG = integrated clock gate, industry shorthand + ff.sig_clk = (*it).second.new_net; + + // Rebuild the flop + (void)ff.emit(); + + gated_flop_count++; + } + ce_ffs.clear(); + clk_nets.clear(); + } + + log("Converted %d FFs.\n", gated_flop_count); + } +} ClockgatePass; + + +PRIVATE_NAMESPACE_END diff --git a/tests/techmap/clockgate.ys b/tests/techmap/clockgate.ys new file mode 100644 index 00000000000..dac4de0ecc5 --- /dev/null +++ b/tests/techmap/clockgate.ys @@ -0,0 +1,196 @@ +read_verilog << EOT + +module dffe_00( input clk, en, + input d1, output reg q1, + ); + always @( negedge clk ) begin + if ( ~en ) + q1 <= d1; + end +endmodule + +module dffe_01( input clk, en, + input d1, output reg q1, + ); + always @( negedge clk ) begin + if ( en ) + q1 <= d1; + end +endmodule + +module dffe_10( input clk, en, + input d1, output reg q1, + ); + always @( posedge clk ) begin + if ( ~en ) + q1 <= d1; + end +endmodule + +module dffe_11( input clk, en, + input d1, output reg q1, + ); + always @( posedge clk ) begin + if ( en ) + q1 <= d1; + end +endmodule + +module dffe_wide_11( input clk, en, + input [3:0] d1, output reg [3:0] q1, + ); + always @( posedge clk ) begin + if ( en ) + q1 <= d1; + end +endmodule + +EOT + +proc +opt + +design -save before + +#------------------------------------------------------------------------------ + +# Test -pos + +clockgate -pos pdk_icg ce:clkin:clkout -tie_lo scanen + +# falling edge clock flops don't get matched on -pos +select -module dffe_00 -assert-count 0 t:\\pdk_icg +select -module dffe_01 -assert-count 0 t:\\pdk_icg +# falling edge clock flops do get matched on -pos +select -module dffe_10 -assert-count 1 t:\\pdk_icg +select -module dffe_11 -assert-count 1 t:\\pdk_icg +# if necessary, EN is inverted, since the given ICG +# is assumed to have an active-high EN +select -module dffe_10 -assert-count 1 t:\$_NOT_ +select -module dffe_11 -assert-count 0 t:\$_NOT_ + +# Extra credit: multi-bit FFs work fine as well +select -module dffe_wide_11 -assert-count 1 t:\\pdk_icg + +#------------------------------------------------------------------------------ + +# Test -neg + +design -load before +clockgate -min_net_size 1 -neg pdk_icg ce:clkin:clkout -tie_lo scanen + +# rising edge clock flops don't get matched on -neg +select -module dffe_00 -assert-count 1 t:\\pdk_icg +select -module dffe_01 -assert-count 1 t:\\pdk_icg +# rising edge clock flops do get matched on -neg +select -module dffe_10 -assert-count 0 t:\\pdk_icg +select -module dffe_11 -assert-count 0 t:\\pdk_icg +# if necessary, EN is inverted, since the given ICG +# is assumed to have an active-high EN +select -module dffe_00 -assert-count 1 t:\$_NOT_ +select -module dffe_01 -assert-count 0 t:\$_NOT_ + +#------------------------------------------------------------------------------ + +# Same as first case, but on fine-grained cells + +design -load before + +techmap + +clockgate -pos pdk_icg ce:clkin:clkout -tie_lo scanen + +# falling edge clock flops don't get matched on -pos +select -module dffe_00 -assert-count 0 t:\\pdk_icg +select -module dffe_01 -assert-count 0 t:\\pdk_icg +# falling edge clock flops do get matched on -pos +select -module dffe_10 -assert-count 1 t:\\pdk_icg +select -module dffe_11 -assert-count 1 t:\\pdk_icg +# if necessary, EN is inverted, since the given ICG +# is assumed to have an active-high EN +select -module dffe_10 -assert-count 1 t:\$_NOT_ +select -module dffe_11 -assert-count 0 t:\$_NOT_ + +# Extra credit: multi-bit FFs work fine as well +select -module dffe_wide_11 -assert-count 1 t:\\pdk_icg + +#------------------------------------------------------------------------------ + +design -load before +clockgate -min_net_size 2 -neg pdk_icg ce:clkin:clkout -tie_lo scanen + +# No FF set sharing a (clock, clock enable) pair is large enough +select -module dffe_00 -assert-count 0 t:\\pdk_icg +select -module dffe_01 -assert-count 0 t:\\pdk_icg +select -module dffe_10 -assert-count 0 t:\\pdk_icg +select -module dffe_11 -assert-count 0 t:\\pdk_icg + +#------------------------------------------------------------------------------ + +design -reset +read_rtlil << EOT + +module \bad1 + wire input 1 \clk + wire input 3 \d1 + wire input 2 \en + wire output 4 \q1 + cell $dffe $auto$ff.cc:266:slice$27 + parameter \CLK_POLARITY 1 + parameter \EN_POLARITY 1 + parameter \WIDTH 1 + connect \CLK \clk + connect \D \d1 + connect \EN 1'1 + connect \Q \q1 + end +end + +module \bad2 + wire input 1 \clk + wire input 3 \d1 + wire input 2 \en + wire output 4 \q1 + cell $dffe $auto$ff.cc:266:slice$27 + parameter \CLK_POLARITY 1 + parameter \EN_POLARITY 1 + parameter \WIDTH 1 + connect \CLK 1'1 + connect \D \d1 + connect \EN \en + connect \Q \q1 + end +end + +EOT + +# Check we don't choke on constants +clockgate -pos pdk_icg ce:clkin:clkout -tie_lo scanen +select -module bad1 -assert-count 0 t:\\pdk_icg +select -module bad2 -assert-count 0 t:\\pdk_icg + +#------------------------------------------------------------------------------ + +# Regression test: EN is a bit from a multi-bit wire +design -reset +read_verilog << EOT +module dffe_wide_11( input clk, input [1:0] en, + input [3:0] d1, output reg [3:0] q1, + ); + always @( posedge clk ) begin + if ( en[0] ) + q1 <= d1; + end +endmodule + +EOT + +proc +opt + +clockgate -pos pdk_icg ce:clkin:clkout -tie_lo scanen +select -assert-count 1 t:\\pdk_icg + +#------------------------------------------------------------------------------ + +# TODO test -tie_lo