diff --git a/passes/pmgen/Makefile.inc b/passes/pmgen/Makefile.inc index a7ef642825c..c2257b72083 100644 --- a/passes/pmgen/Makefile.inc +++ b/passes/pmgen/Makefile.inc @@ -42,7 +42,8 @@ GENFILES += passes/pmgen/peepopt_pm.h passes/pmgen/peepopt.o: passes/pmgen/peepopt_pm.h $(eval $(call add_extra_objs,passes/pmgen/peepopt_pm.h)) -PEEPOPT_PATTERN = passes/pmgen/peepopt_shiftmul.pmg +PEEPOPT_PATTERN = passes/pmgen/peepopt_shiftmul_right.pmg +PEEPOPT_PATTERN += passes/pmgen/peepopt_shiftmul_left.pmg PEEPOPT_PATTERN += passes/pmgen/peepopt_muldiv.pmg passes/pmgen/peepopt_pm.h: passes/pmgen/pmgen.py $(PEEPOPT_PATTERN) diff --git a/passes/pmgen/README.md b/passes/pmgen/README.md index 39560839f0f..3205be1b55e 100644 --- a/passes/pmgen/README.md +++ b/passes/pmgen/README.md @@ -212,7 +212,7 @@ second argument, and the matcher will iterate over those options: index port(eq, BA) === bar set eq_ab AB set eq_ba BA - generate + endmatch Notice how `define` can be used to define additional local variables similar to the loop variables defined by `slice` and `choice`. diff --git a/passes/pmgen/peepopt.cc b/passes/pmgen/peepopt.cc index 9a497c91441..bf751f0154b 100644 --- a/passes/pmgen/peepopt.cc +++ b/passes/pmgen/peepopt.cc @@ -24,11 +24,8 @@ USING_YOSYS_NAMESPACE PRIVATE_NAMESPACE_BEGIN bool did_something; -dict initbits; -pool rminitbits; #include "passes/pmgen/peepopt_pm.h" -#include "generate.h" struct PeepoptPass : public Pass { PeepoptPass() : Pass("peepopt", "collection of peephole optimizers") { } @@ -40,38 +37,38 @@ struct PeepoptPass : public Pass { log("\n"); log("This pass applies a collection of peephole optimizers to the current design.\n"); log("\n"); + log("This pass employs the following rules:\n"); + log("\n"); + log(" * muldiv - Replace (A*B)/B with A\n"); + log("\n"); + log(" * shiftmul - Replace A>>(B*C) with A'>>(B< args, RTLIL::Design *design) override { - std::string genmode; - log_header(design, "Executing PEEPOPT pass (run peephole optimizers).\n"); + bool bmux_mode = false, wshift_mode = false; size_t argidx; for (argidx = 1; argidx < args.size(); argidx++) { - if (args[argidx] == "-generate" && argidx+1 < args.size()) { - genmode = args[++argidx]; + if (args[argidx] == "-bmux") { + bmux_mode = true; + continue; + } + if (args[argidx] == "-wshift") { + wshift_mode = true; continue; } break; } extra_args(args, argidx, design); - if (!genmode.empty()) - { - initbits.clear(); - rminitbits.clear(); - - if (genmode == "shiftmul") - GENERATE_PATTERN(peepopt_pm, shiftmul); - else if (genmode == "muldiv") - GENERATE_PATTERN(peepopt_pm, muldiv); - else - log_abort(); - return; - } - for (auto module : design->selected_modules()) { did_something = true; @@ -79,47 +76,17 @@ struct PeepoptPass : public Pass { while (did_something) { did_something = false; - initbits.clear(); - rminitbits.clear(); peepopt_pm pm(module); - for (auto w : module->wires()) { - auto it = w->attributes.find(ID::init); - if (it != w->attributes.end()) { - SigSpec sig = pm.sigmap(w); - Const val = it->second; - int len = std::min(GetSize(sig), GetSize(val)); - for (int i = 0; i < len; i++) { - if (sig[i].wire == nullptr) - continue; - if (val[i] != State::S0 && val[i] != State::S1) - continue; - initbits[sig[i]] = val[i]; - } - } - } - pm.setup(module->selected_cells()); - pm.run_shiftmul(); - pm.run_muldiv(); - - for (auto w : module->wires()) { - auto it = w->attributes.find(ID::init); - if (it != w->attributes.end()) { - SigSpec sig = pm.sigmap(w); - Const &val = it->second; - int len = std::min(GetSize(sig), GetSize(val)); - for (int i = 0; i < len; i++) { - if (rminitbits.count(sig[i])) - val[i] = State::Sx; - } - } - } + pm.ud_shiftmul_right.bmux_mode = bmux_mode; + pm.ud_shiftmul_right.wshift_mode = wshift_mode; - initbits.clear(); - rminitbits.clear(); + pm.run_shiftmul_right(); + pm.run_shiftmul_left(); + pm.run_muldiv(); } } } diff --git a/passes/pmgen/peepopt_shiftmul.pmg b/passes/pmgen/peepopt_shiftmul.pmg deleted file mode 100644 index d71fbf74449..00000000000 --- a/passes/pmgen/peepopt_shiftmul.pmg +++ /dev/null @@ -1,92 +0,0 @@ -pattern shiftmul -// -// Optimize mul+shift pairs that result from expressions such as foo[s*W+:W] -// - -state shamt - -match shift - select shift->type.in($shift, $shiftx, $shr) -endmatch - -code shamt - shamt = port(shift, \B); - if (shamt.empty()) - reject; - if (shamt[GetSize(shamt)-1] == State::S0) { - do { - shamt.remove(GetSize(shamt)-1); - if (shamt.empty()) - reject; - } while (shamt[GetSize(shamt)-1] == State::S0); - } else - if (shift->type.in($shift, $shiftx) && param(shift, \B_SIGNED).as_bool()) { - reject; - } - if (GetSize(shamt) > 20) - reject; -endcode - -match mul - select mul->type.in($mul) - select port(mul, \A).is_fully_const() || port(mul, \B).is_fully_const() - index port(mul, \Y) === shamt - filter !param(mul, \A_SIGNED).as_bool() -endmatch - -code -{ - IdString const_factor_port = port(mul, \A).is_fully_const() ? \A : \B; - Const const_factor_cnst = port(mul, const_factor_port).as_const(); - int const_factor = const_factor_cnst.as_int(); - - if (GetSize(const_factor_cnst) == 0) - reject; - - if (GetSize(const_factor_cnst) > 20) - reject; - - if (GetSize(port(shift, \Y)) > const_factor) - reject; - - int factor_bits = ceil_log2(const_factor); - SigSpec mul_din = port(mul, const_factor_port == \A ? \B : \A); - - if (GetSize(shamt) < factor_bits+GetSize(mul_din)) - reject; - - did_something = true; - log("shiftmul pattern in %s: shift=%s, mul=%s\n", log_id(module), log_id(shift), log_id(mul)); - - int new_const_factor = 1 << factor_bits; - SigSpec padding(State::Sx, new_const_factor-const_factor); - SigSpec old_a = port(shift, \A), new_a; - int trunc = 0; - - if (GetSize(old_a) % const_factor != 0) { - trunc = const_factor - GetSize(old_a) % const_factor; - old_a.append(SigSpec(State::Sx, trunc)); - } - - for (int i = 0; i*const_factor < GetSize(old_a); i++) { - SigSpec slice = old_a.extract(i*const_factor, const_factor); - new_a.append(slice); - new_a.append(padding); - } - - if (trunc > 0) - new_a.remove(GetSize(new_a)-trunc, trunc); - - SigSpec new_b = {mul_din, SigSpec(State::S0, factor_bits)}; - if (param(shift, \B_SIGNED).as_bool()) - new_b.append(State::S0); - - shift->setPort(\A, new_a); - shift->setParam(\A_WIDTH, GetSize(new_a)); - shift->setPort(\B, new_b); - shift->setParam(\B_WIDTH, GetSize(new_b)); - - blacklist(shift); - accept; -} -endcode diff --git a/passes/pmgen/peepopt_shiftmul_left.pmg b/passes/pmgen/peepopt_shiftmul_left.pmg new file mode 100644 index 00000000000..607f8368c53 --- /dev/null +++ b/passes/pmgen/peepopt_shiftmul_left.pmg @@ -0,0 +1,160 @@ +pattern shiftmul_left +// +// Optimize mul+shift pairs that result from expressions such as foo[s*W+:W] +// + +match shift + select shift->type.in($shift, $shiftx, $shl) + select shift->type.in($shl) || param(shift, \B_SIGNED).as_bool() + filter !port(shift, \B).empty() +endmatch + +match neg + if shift->type.in($shift, $shiftx) + select neg->type == $neg + index port(neg, \Y) === port(shift, \B) + filter !port(shift, \A).empty() +endmatch + +// the left shift amount +state shift_amount +// log2 scale factor in interpreting of shift_amount +// due to zero padding on the shift cell's B port +state log2scale + +code shift_amount log2scale + if (neg) { + // case of `$shift`, `$shiftx` + shift_amount = port(neg, \A); + if (!param(neg, \A_SIGNED).as_bool()) + shift_amount.append(State::S0); + } else { + // case of `$shl` + shift_amount = port(shift, \B); + if (!param(shift, \B_SIGNED).as_bool()) + shift_amount.append(State::S0); + } + + // at this point shift_amount is signed, make + // sure we can never go negative + if (shift_amount.bits().back() != State::S0) + reject; + + while (shift_amount.bits().back() == State::S0) { + shift_amount.remove(GetSize(shift_amount) - 1); + if (shift_amount.empty()) reject; + } + + log2scale = 0; + while (shift_amount[0] == State::S0) { + shift_amount.remove(0); + if (shift_amount.empty()) reject; + log2scale++; + } + + if (GetSize(shift_amount) > 20) + reject; +endcode + +state mul_din +state mul_const + +match mul + select mul->type.in($mul) + index port(mul, \Y) === shift_amount + filter !param(mul, \A_SIGNED).as_bool() + + choice constport {\A, \B} + filter port(mul, constport).is_fully_const() + + define varport (constport == \A ? \B : \A) + set mul_const SigSpec({port(mul, constport), SigSpec(State::S0, log2scale)}).as_const() + // get mul_din unmapped (so no `port()` shorthand) + // because we will be using it to set the \A port + // on the shift cell, and we want to stay close + // to the original design + set mul_din mul->getPort(varport) +endmatch + +code +{ + if (mul_const.empty() || GetSize(mul_const) > 20) + reject; + + // make sure there's no overlap in the signal + // selections by the shiftmul pattern + if (GetSize(port(shift, \A)) > mul_const.as_int()) + reject; + + int factor_bits = ceil_log2(mul_const.as_int()); + // make sure the multiplication never wraps around + if (GetSize(shift_amount) < factor_bits + GetSize(mul_din)) + reject; + + if (neg) { + // make sure the negation never wraps around + if (GetSize(port(shift, \B)) < factor_bits + GetSize(mul_din) + + log2scale + 1) + reject; + } + + did_something = true; + log("left shiftmul pattern in %s: shift=%s, mul=%s\n", log_id(module), log_id(shift), log_id(mul)); + + int const_factor = mul_const.as_int(); + int new_const_factor = 1 << factor_bits; + SigSpec padding(State::Sm, new_const_factor-const_factor); + SigSpec old_y = port(shift, \Y), new_y; + int trunc = 0; + + if (GetSize(old_y) % const_factor != 0) { + trunc = const_factor - GetSize(old_y) % const_factor; + old_y.append(SigSpec(State::Sm, trunc)); + } + + for (int i = 0; i*const_factor < GetSize(old_y); i++) { + SigSpec slice = old_y.extract(i*const_factor, const_factor); + new_y.append(slice); + new_y.append(padding); + } + + if (trunc > 0) + new_y.remove(GetSize(new_y)-trunc, trunc); + + { + // Now replace occurences of Sm in new_y with bits + // of a dummy wire + int padbits = 0; + for (auto bit : new_y) + if (bit == SigBit(State::Sm)) + padbits++; + + SigSpec padwire = module->addWire(NEW_ID, padbits); + + for (int i = new_y.size() - 1; i >= 0; i--) + if (new_y[i] == SigBit(State::Sm)) { + new_y[i] = padwire.bits().back(); + padwire.remove(padwire.size() - 1); + } + } + + SigSpec new_b = {mul_din, SigSpec(State::S0, factor_bits)}; + + shift->setPort(\Y, new_y); + shift->setParam(\Y_WIDTH, GetSize(new_y)); + if (shift->type == $shl) { + if (param(shift, \B_SIGNED).as_bool()) + new_b.append(State::S0); + shift->setPort(\B, new_b); + shift->setParam(\B_WIDTH, GetSize(new_b)); + } else { + SigSpec b_neg = module->addWire(NEW_ID, GetSize(new_b) + 1); + module->addNeg(NEW_ID, new_b, b_neg); + shift->setPort(\B, b_neg); + shift->setParam(\B_WIDTH, GetSize(b_neg)); + } + + blacklist(shift); + accept; +} +endcode diff --git a/passes/pmgen/peepopt_shiftmul_right.pmg b/passes/pmgen/peepopt_shiftmul_right.pmg new file mode 100644 index 00000000000..23f00d38cf6 --- /dev/null +++ b/passes/pmgen/peepopt_shiftmul_right.pmg @@ -0,0 +1,183 @@ +pattern shiftmul_right +// +// Optimize mul+shift pairs that result from expressions such as foo[s*W+:W] +// + +udata bmux_mode wshift_mode + +match shift + select shift->type.in($shift, $shiftx, $shr) + filter !port(shift, \B).empty() +endmatch + +// the right shift amount +state shift_amount +// log2 scale factor in interpreting of shift_amount +// due to zero padding on the shift cell's B port +state log2scale + +code shift_amount log2scale + shift_amount = port(shift, \B); + if (shift->type.in($shr) || !param(shift, \B_SIGNED).as_bool()) + shift_amount.append(State::S0); + + // at this point shift_amount is signed, make + // sure we can never go negative + if (shift_amount.bits().back() != State::S0) + reject; + + while (shift_amount.bits().back() == State::S0) { + shift_amount.remove(GetSize(shift_amount) - 1); + if (shift_amount.empty()) reject; + } + + log2scale = 0; + while (shift_amount[0] == State::S0) { + shift_amount.remove(0); + if (shift_amount.empty()) reject; + log2scale++; + } + + if (GetSize(shift_amount) > 20) + reject; +endcode + +state mul_din +state mul_const + +match mul + select mul->type.in($mul) + index port(mul, \Y) === shift_amount + filter !param(mul, \A_SIGNED).as_bool() + + choice constport {\A, \B} + filter port(mul, constport).is_fully_const() + + define varport (constport == \A ? \B : \A) + set mul_const SigSpec({port(mul, constport), SigSpec(State::S0, log2scale)}).as_const() + // get mul_din unmapped (so no `port()` shorthand) + // because we will be using it to set the \A port + // on the shift cell, and we want to stay close + // to the original design + set mul_din mul->getPort(varport) +endmatch + +code +{ + if (mul_const.empty() || GetSize(mul_const) > 20) + reject; + + // make sure there's no overlap in the signal + // selections by the shiftmul pattern + if (GetSize(port(shift, \Y)) > mul_const.as_int()) + if (!bmux_mode && !wshift_mode) + reject; + + int factor_bits = ceil_log2(mul_const.as_int()); + // make sure the multiplication never wraps around + if (GetSize(shift_amount) + log2scale < factor_bits + GetSize(mul_din)) + reject; + + did_something = true; + log("right shiftmul pattern in %s: shift=%s, mul=%s\n", log_id(module), log_id(shift), log_id(mul)); + + if (bmux_mode) { + SigSpec out = port(shift, \Y); + int shift_stride = mul_const.as_int(); + SigSpec shift_vector = port(shift, \A); + SigBit padbit = (shift->type != $shiftx) ? (param(shift, \A_SIGNED)).as_bool() ? + port(shift, \A).bits().back() : State::S0 : State::Sx; + + // Construct the input vector for the new bmux cell + SigSpec bmux_vector; + for (int i = 0; i * shift_stride < GetSize(shift_vector); i++) { + int validbits = std::min(GetSize(out), + GetSize(shift_vector) - i * shift_stride); + bmux_vector.append(shift_vector.extract(i * shift_stride, validbits)); + bmux_vector.append(SigSpec(padbit, GetSize(out) - validbits)); + } + + // Prep the select signal + SigSpec bmux_sel = mul_din; + bmux_sel.extend_u0(ceil_log2((GetSize(bmux_vector) + GetSize(out) - 1) / GetSize(out))); + + // Pad the vector + int vector_targetlen = GetSize(out) << GetSize(bmux_sel); + if (vector_targetlen > GetSize(bmux_vector)) + bmux_vector.append(SigSpec(padbit, vector_targetlen - GetSize(bmux_vector))); + else + bmux_vector.remove(vector_targetlen, GetSize(bmux_vector) - vector_targetlen); + + // Build the bmux, add an extra mux to handle top bits on the old select signal + SigSpec bmux_out, out_of_range; + std::string src = shift->get_src_attribute(); + bmux_out = module->Bmux(NEW_ID, bmux_vector, bmux_sel, src); + if (GetSize(mul_din) > GetSize(bmux_sel)) { + out_of_range = module->ReduceBool(NEW_ID, mul_din.extract_end(GetSize(bmux_sel)), false, src); + module->addMux(NEW_ID, bmux_out, SigSpec(padbit, GetSize(out)), out_of_range, out, src); + } else { + module->connect(out, bmux_out); + } + autoremove(shift); + accept; + } else if (wshift_mode) { + SigSpec out = port(shift, \Y); + SigSpec shift_vector = port(shift, \A); + int shift_stride = mul_const.as_int(); + for (int j = 0; j < shift_stride; j++) { + SigSpec slice_vector; + SigSpec slice_out; + for (int off = j; off < GetSize(shift_vector); off += shift_stride) + slice_vector.append(shift_vector.extract(off, 1)); + if (param(shift, \A_SIGNED).as_bool()) + slice_vector.append(shift_vector.bits().back()); + for (int off = j; off < GetSize(out); off += shift_stride) + slice_out.append(out.extract(off, 1)); + + if (slice_out.empty()) continue; + Cell *slice = module->addCell(NEW_ID, shift); + slice->setParam(\A_WIDTH, GetSize(slice_vector)); + slice->setPort(\A, slice_vector); + slice->setParam(\B_WIDTH, GetSize(mul_din)); + slice->setParam(\B_SIGNED, Const(1, 0)); + slice->setPort(\B, mul_din); + slice->setParam(\Y_WIDTH, GetSize(slice_out)); + slice->setPort(\Y, slice_out); + } + autoremove(shift); + accept; + } + + int const_factor = mul_const.as_int(); + int new_const_factor = 1 << factor_bits; + SigSpec padding(State::Sx, new_const_factor-const_factor); + SigSpec old_a = port(shift, \A), new_a; + int trunc = 0; + + if (GetSize(old_a) % const_factor != 0) { + trunc = const_factor - GetSize(old_a) % const_factor; + old_a.append(SigSpec(State::Sx, trunc)); + } + + for (int i = 0; i*const_factor < GetSize(old_a); i++) { + SigSpec slice = old_a.extract(i*const_factor, const_factor); + new_a.append(slice); + new_a.append(padding); + } + + if (trunc > 0) + new_a.remove(GetSize(new_a)-trunc, trunc); + + SigSpec new_b = {mul_din, SigSpec(State::S0, factor_bits)}; + if (param(shift, \B_SIGNED).as_bool()) + new_b.append(State::S0); + + shift->setPort(\A, new_a); + shift->setParam(\A_WIDTH, GetSize(new_a)); + shift->setPort(\B, new_b); + shift->setParam(\B_WIDTH, GetSize(new_b)); + + blacklist(shift); + accept; +} +endcode