diff --git a/docs/source/cell/word_arith.rst b/docs/source/cell/word_arith.rst index 49070814add..8e3daa27f30 100644 --- a/docs/source/cell/word_arith.rst +++ b/docs/source/cell/word_arith.rst @@ -1,7 +1,7 @@ Coarse arithmetics ------------------ -.. todo:: Add information about `$alu`, `$fa`, and `$lcu` cells. +.. todo:: Add information about `$alu`, `$fa`, `$macc_v2`, and `$lcu` cells. The `$macc` cell type represents a generalized multiply and accumulate operation. The cell is purely combinational. It outputs the result of summing up diff --git a/docs/source/code_examples/fifo/fifo.ys b/docs/source/code_examples/fifo/fifo.ys index 57a28e63e8e..e6b9bf69d4c 100644 --- a/docs/source/code_examples/fifo/fifo.ys +++ b/docs/source/code_examples/fifo/fifo.ys @@ -67,7 +67,7 @@ show -color maroon3 @new_cells -notitle -format dot -prefix rdata_memrdv2 o:rdat # ======================================================== alumacc -select -set new_cells t:$alu t:$macc +select -set new_cells t:$alu t:$macc_v2 show -color maroon3 @new_cells -notitle -format dot -prefix rdata_alumacc o:rdata %ci* # ======================================================== diff --git a/docs/source/getting_started/example_synth.rst b/docs/source/getting_started/example_synth.rst index 189eaddfada..e215586cc9c 100644 --- a/docs/source/getting_started/example_synth.rst +++ b/docs/source/getting_started/example_synth.rst @@ -523,7 +523,7 @@ That brings us to the fourth and final part for the iCE40 synthesis flow: :name: synth_coarse4 Where before each type of arithmetic operation had its own cell, e.g. `$add`, we -now want to extract these into `$alu` and `$macc` cells which can help identify +now want to extract these into `$alu` and `$macc_v2` cells which can help identify opportunities for reusing logic. We do this by running `alumacc`, which we can see produce the following changes in our example design: diff --git a/kernel/celledges.cc b/kernel/celledges.cc index bad7124d9b1..95f160612c9 100644 --- a/kernel/celledges.cc +++ b/kernel/celledges.cc @@ -453,7 +453,7 @@ bool YOSYS_NAMESPACE_PREFIX AbstractCellEdgesDatabase::add_edges_from_cell(RTLIL } // FIXME: $mul $div $mod $divfloor $modfloor $slice $concat - // FIXME: $lut $sop $alu $lcu $macc $fa + // FIXME: $lut $sop $alu $lcu $macc $macc_v2 $fa // FIXME: $mul $div $mod $divfloor $modfloor $pow $slice $concat $bweqx // FIXME: $lut $sop $alu $lcu $macc $fa $logic_and $logic_or $bwmux diff --git a/kernel/celltypes.h b/kernel/celltypes.h index 3167a9addf9..0ce5db54d46 100644 --- a/kernel/celltypes.h +++ b/kernel/celltypes.h @@ -144,6 +144,7 @@ struct CellTypes setup_type(ID($lcu), {ID::P, ID::G, ID::CI}, {ID::CO}, true); setup_type(ID($alu), {ID::A, ID::B, ID::CI, ID::BI}, {ID::X, ID::Y, ID::CO}, true); + setup_type(ID($macc_v2), {ID::A, ID::B, ID::C}, {ID::Y}, true); setup_type(ID($fa), {ID::A, ID::B, ID::C}, {ID::X, ID::Y}, true); } diff --git a/kernel/consteval.h b/kernel/consteval.h index 331d8f12854..05e94ab86a3 100644 --- a/kernel/consteval.h +++ b/kernel/consteval.h @@ -310,7 +310,7 @@ struct ConstEval } } } - else if (cell->type == ID($macc)) + else if (cell->type.in(ID($macc), ID($macc_v2))) { Macc macc; macc.from_cell(cell); diff --git a/kernel/constids.inc b/kernel/constids.inc index d68e2dfe646..4fdbb3dc84d 100644 --- a/kernel/constids.inc +++ b/kernel/constids.inc @@ -276,3 +276,11 @@ X(Y) X(Y_WIDTH) X(area) X(capacitance) +X(NPRODUCTS) +X(NADDENDS) +X(PRODUCT_NEGATED) +X(ADDEND_NEGATED) +X(A_WIDTHS) +X(B_WIDTHS) +X(C_WIDTHS) +X(C_SIGNED) diff --git a/kernel/macc.h b/kernel/macc.h index 55940769de2..5fc6036ed1e 100644 --- a/kernel/macc.h +++ b/kernel/macc.h @@ -82,7 +82,7 @@ struct Macc new_ports.swap(ports); } - void from_cell(RTLIL::Cell *cell) + void from_cell_v1(RTLIL::Cell *cell) { RTLIL::SigSpec port_a = cell->getPort(ID::A); @@ -136,52 +136,128 @@ struct Macc log_assert(port_a_cursor == GetSize(port_a)); } - void to_cell(RTLIL::Cell *cell) const + void from_cell(RTLIL::Cell *cell) { - RTLIL::SigSpec port_a; - std::vector config_bits; - int max_size = 0, num_bits = 0; - - for (auto &port : ports) { - max_size = max(max_size, GetSize(port.in_a)); - max_size = max(max_size, GetSize(port.in_b)); + if (cell->type == ID($macc)) { + from_cell_v1(cell); + return; } + log_assert(cell->type == ID($macc_v2)); - while (max_size) - num_bits++, max_size /= 2; + RTLIL::SigSpec port_a = cell->getPort(ID::A); + RTLIL::SigSpec port_b = cell->getPort(ID::B); + RTLIL::SigSpec port_c = cell->getPort(ID::C); - log_assert(num_bits < 16); - config_bits.push_back(num_bits & 1 ? State::S1 : State::S0); - config_bits.push_back(num_bits & 2 ? State::S1 : State::S0); - config_bits.push_back(num_bits & 4 ? State::S1 : State::S0); - config_bits.push_back(num_bits & 8 ? State::S1 : State::S0); + ports.clear(); - for (auto &port : ports) - { - if (GetSize(port.in_a) == 0) - continue; + int nproducts = cell->getParam(ID::NPRODUCTS).as_int(); + const Const &product_neg = cell->getParam(ID::PRODUCT_NEGATED); + const Const &a_widths = cell->getParam(ID::A_WIDTHS); + const Const &b_widths = cell->getParam(ID::B_WIDTHS); + const Const &a_signed = cell->getParam(ID::A_SIGNED); + const Const &b_signed = cell->getParam(ID::B_SIGNED); + int ai = 0, bi = 0; + for (int i = 0; i < nproducts; i++) { + port_t term; + + log_assert(a_signed[i] == b_signed[i]); + term.is_signed = (a_signed[i] == State::S1); + int a_width = a_widths.extract(16 * i, 16).as_int(false); + int b_width = b_widths.extract(16 * i, 16).as_int(false); + + term.in_a = port_a.extract(ai, a_width); + ai += a_width; + term.in_b = port_b.extract(bi, b_width); + bi += b_width; + term.do_subtract = (product_neg[i] == State::S1); + + ports.push_back(term); + } + log_assert(port_a.size() == ai); + log_assert(port_b.size() == bi); - config_bits.push_back(port.is_signed ? State::S1 : State::S0); - config_bits.push_back(port.do_subtract ? State::S1 : State::S0); + int naddends = cell->getParam(ID::NADDENDS).as_int(); + const Const &addend_neg = cell->getParam(ID::ADDEND_NEGATED); + const Const &c_widths = cell->getParam(ID::C_WIDTHS); + const Const &c_signed = cell->getParam(ID::C_SIGNED); + int ci = 0; + for (int i = 0; i < naddends; i++) { + port_t term; - int size_a = GetSize(port.in_a); - for (int i = 0; i < num_bits; i++) - config_bits.push_back(size_a & (1 << i) ? State::S1 : State::S0); + term.is_signed = (c_signed[i] == State::S1); + int c_width = c_widths.extract(16 * i, 16).as_int(false); - int size_b = GetSize(port.in_b); - for (int i = 0; i < num_bits; i++) - config_bits.push_back(size_b & (1 << i) ? State::S1 : State::S0); + term.in_a = port_c.extract(ci, c_width); + ci += c_width; + term.do_subtract = (addend_neg[i] == State::S1); + + ports.push_back(term); + } + log_assert(port_c.size() == ci); + } - port_a.append(port.in_a); - port_a.append(port.in_b); + void to_cell(RTLIL::Cell *cell) + { + cell->type = ID($macc_v2); + + int nproducts = 0, naddends = 0; + Const a_signed, b_signed, a_widths, b_widths, product_negated; + Const c_signed, c_widths, addend_negated; + SigSpec a, b, c; + + for (int i = 0; i < (int) ports.size(); i++) { + SigSpec term_a = ports[i].in_a, term_b = ports[i].in_b; + + if (term_b.empty()) { + // addend + c_widths.append(Const(term_a.size(), 16)); + c_signed.append(ports[i].is_signed ? RTLIL::S1 : RTLIL::S0); + addend_negated.append(ports[i].do_subtract ? RTLIL::S1 : RTLIL::S0); + c.append(term_a); + naddends++; + } else { + // product + a_widths.append(Const(term_a.size(), 16)); + b_widths.append(Const(term_b.size(), 16)); + a_signed.append(ports[i].is_signed ? RTLIL::S1 : RTLIL::S0); + b_signed.append(ports[i].is_signed ? RTLIL::S1 : RTLIL::S0); + product_negated.append(ports[i].do_subtract ? RTLIL::S1 : RTLIL::S0); + a.append(term_a); + b.append(term_b); + nproducts++; + } } - cell->setPort(ID::A, port_a); - cell->setPort(ID::B, {}); - cell->setParam(ID::CONFIG, config_bits); - cell->setParam(ID::CONFIG_WIDTH, GetSize(config_bits)); - cell->setParam(ID::A_WIDTH, GetSize(port_a)); - cell->setParam(ID::B_WIDTH, 0); + if (a_signed.empty()) + a_signed = {RTLIL::Sx}; + if (b_signed.empty()) + b_signed = {RTLIL::Sx}; + if (c_signed.empty()) + c_signed = {RTLIL::Sx}; + if (a_widths.empty()) + a_widths = {RTLIL::Sx}; + if (b_widths.empty()) + b_widths = {RTLIL::Sx}; + if (c_widths.empty()) + c_widths = {RTLIL::Sx}; + if (product_negated.empty()) + product_negated = {RTLIL::Sx}; + if (addend_negated.empty()) + addend_negated = {RTLIL::Sx}; + + cell->setParam(ID::NPRODUCTS, nproducts); + cell->setParam(ID::PRODUCT_NEGATED, product_negated); + cell->setParam(ID::NADDENDS, naddends); + cell->setParam(ID::ADDEND_NEGATED, addend_negated); + cell->setParam(ID::A_SIGNED, a_signed); + cell->setParam(ID::B_SIGNED, b_signed); + cell->setParam(ID::C_SIGNED, c_signed); + cell->setParam(ID::A_WIDTHS, a_widths); + cell->setParam(ID::B_WIDTHS, b_widths); + cell->setParam(ID::C_WIDTHS, c_widths); + cell->setPort(ID::A, a); + cell->setPort(ID::B, b); + cell->setPort(ID::C, c); } bool eval(RTLIL::Const &result) const diff --git a/kernel/rtlil.cc b/kernel/rtlil.cc index cb0f7da7807..3b9a4a8b11c 100644 --- a/kernel/rtlil.cc +++ b/kernel/rtlil.cc @@ -540,6 +540,12 @@ void RTLIL::Const::bitvectorize() const { } } +void RTLIL::Const::append(const RTLIL::Const &other) { + bitvectorize(); + bitvectype& bv = get_bits(); + bv.insert(bv.end(), other.begin(), other.end()); +} + RTLIL::State RTLIL::Const::const_iterator::operator*() const { if (auto bv = parent.get_if_bits()) return (*bv)[idx]; @@ -1461,6 +1467,40 @@ namespace { return; } + if (cell->type == ID($macc_v2)) { + if (param(ID::NPRODUCTS) < 0) + error(__LINE__); + if (param(ID::NADDENDS) < 0) + error(__LINE__); + param_bits(ID::PRODUCT_NEGATED, max(param(ID::NPRODUCTS), 1)); + param_bits(ID::ADDEND_NEGATED, max(param(ID::NADDENDS), 1)); + param_bits(ID::A_SIGNED, max(param(ID::NPRODUCTS), 1)); + param_bits(ID::B_SIGNED, max(param(ID::NPRODUCTS), 1)); + param_bits(ID::C_SIGNED, max(param(ID::NADDENDS), 1)); + if (cell->getParam(ID::A_SIGNED) != cell->getParam(ID::B_SIGNED)) + error(__LINE__); + param_bits(ID::A_WIDTHS, max(param(ID::NPRODUCTS) * 16, 1)); + param_bits(ID::B_WIDTHS, max(param(ID::NPRODUCTS) * 16, 1)); + param_bits(ID::C_WIDTHS, max(param(ID::NADDENDS) * 16, 1)); + const Const &a_width = cell->getParam(ID::A_WIDTHS); + const Const &b_width = cell->getParam(ID::B_WIDTHS); + const Const &c_width = cell->getParam(ID::C_WIDTHS); + int a_width_sum = 0, b_width_sum = 0, c_width_sum = 0; + for (int i = 0; i < param(ID::NPRODUCTS); i++) { + a_width_sum += a_width.extract(16 * i, 16).as_int(false); + b_width_sum += b_width.extract(16 * i, 16).as_int(false); + } + for (int i = 0; i < param(ID::NADDENDS); i++) { + c_width_sum += c_width.extract(16 * i, 16).as_int(false); + } + port(ID::A, a_width_sum); + port(ID::B, b_width_sum); + port(ID::C, c_width_sum); + port(ID::Y, param(ID::Y_WIDTH)); + check_expected(); + return; + } + if (cell->type == ID($logic_not)) { param_bool(ID::A_SIGNED); port(ID::A, param(ID::A_WIDTH)); @@ -4093,6 +4133,11 @@ void RTLIL::Cell::fixup_parameters(bool set_a_signed, bool set_b_signed) return; } + if (type == ID($macc_v2)) { + parameters[ID::Y_WIDTH] = GetSize(connections_[ID::Y]); + return; + } + bool signedness_ab = !type.in(ID($slice), ID($concat), ID($macc)); if (connections_.count(ID::A)) { diff --git a/kernel/rtlil.h b/kernel/rtlil.h index f9cacd151cd..33742cb701c 100644 --- a/kernel/rtlil.h +++ b/kernel/rtlil.h @@ -738,6 +738,8 @@ struct RTLIL::Const bool empty() const; void bitvectorize() const; + void append(const RTLIL::Const &other); + class const_iterator { private: const Const& parent; diff --git a/kernel/satgen.cc b/kernel/satgen.cc index dd15b51f3bd..9e5fa911155 100644 --- a/kernel/satgen.cc +++ b/kernel/satgen.cc @@ -740,7 +740,7 @@ bool SatGen::importCell(RTLIL::Cell *cell, int timestep) return true; } - if (cell->type == ID($macc)) + if (cell->type.in(ID($macc), ID($macc_v2))) { std::vector a = importDefSigSpec(cell->getPort(ID::A), timestep); std::vector y = importDefSigSpec(cell->getPort(ID::Y), timestep); diff --git a/passes/techmap/booth.cc b/passes/techmap/booth.cc index dce7da486f5..cd9012e6cd3 100644 --- a/passes/techmap/booth.cc +++ b/passes/techmap/booth.cc @@ -218,7 +218,7 @@ struct BoothPassWorker { log_assert(cell->getParam(ID::A_SIGNED).as_bool() == cell->getParam(ID::B_SIGNED).as_bool()); is_signed = cell->getParam(ID::A_SIGNED).as_bool(); - } else if (cell->type == ID($macc)) { + } else if (cell->type.in(ID($macc), ID($macc_v2))) { Macc macc; macc.from_cell(cell); diff --git a/passes/techmap/maccmap.cc b/passes/techmap/maccmap.cc index 911d66cfa1b..3dde9243803 100644 --- a/passes/techmap/maccmap.cc +++ b/passes/techmap/maccmap.cc @@ -403,7 +403,7 @@ struct MaccmapPass : public Pass { for (auto mod : design->selected_modules()) for (auto cell : mod->selected_cells()) - if (cell->type == ID($macc)) { + if (cell->type.in(ID($macc), ID($macc_v2))) { log("Mapping %s.%s (%s).\n", log_id(mod), log_id(cell), log_id(cell->type)); maccmap(mod, cell, unmap_mode); mod->remove(cell); diff --git a/passes/techmap/techmap.cc b/passes/techmap/techmap.cc index b77bab69eb0..95c733f6240 100644 --- a/passes/techmap/techmap.cc +++ b/passes/techmap/techmap.cc @@ -554,8 +554,8 @@ struct TechmapWorker if (extmapper_name == "maccmap") { log("Creating %s with maccmap.\n", log_id(extmapper_module)); - if (extmapper_cell->type != ID($macc)) - log_error("The maccmap mapper can only map $macc (not %s) cells!\n", log_id(extmapper_cell->type)); + if (!extmapper_cell->type.in(ID($macc), ID($macc_v2))) + log_error("The maccmap mapper can only map $macc/$macc_v2 (not %s) cells!\n", log_id(extmapper_cell->type)); maccmap(extmapper_module, extmapper_cell); extmapper_module->remove(extmapper_cell); } @@ -600,8 +600,8 @@ struct TechmapWorker } if (extmapper_name == "maccmap") { - if (cell->type != ID($macc)) - log_error("The maccmap mapper can only map $macc (not %s) cells!\n", log_id(cell->type)); + if (!cell->type.in(ID($macc), ID($macc_v2))) + log_error("The maccmap mapper can only map $macc/$macc_v2 (not %s) cells!\n", log_id(cell->type)); maccmap(module, cell); } diff --git a/techlibs/common/simlib.v b/techlibs/common/simlib.v index df9e1feb1e4..028e4e5a9e4 100644 --- a/techlibs/common/simlib.v +++ b/techlibs/common/simlib.v @@ -1207,6 +1207,120 @@ end endmodule +// -------------------------------------------------------- +// |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---| +//- +//- $macc_v2 (A, B, C, Y) +//* group arith +//- +//- Multiply and add. +//- This cell represents a generic fused multiply-add operation, it supersedes the +//- earlier $macc cell. +//- +module \$macc_v2 (A, B, C, Y); + +parameter NPRODUCTS = 0; +parameter NADDENDS = 0; +parameter A_WIDTHS = 16'h0000; +parameter B_WIDTHS = 16'h0000; +parameter C_WIDTHS = 16'h0000; +parameter Y_WIDTH = 0; + +parameter PRODUCT_NEGATED = 1'bx; +parameter ADDEND_NEGATED = 1'bx; +parameter A_SIGNED = 1'bx; +parameter B_SIGNED = 1'bx; +parameter C_SIGNED = 1'bx; + +function integer sum_widths1; + input [(16*NPRODUCTS)-1:0] widths; + integer i; + begin + sum_widths1 = 0; + for (i = 0; i < NPRODUCTS; i++) begin + sum_widths1 = sum_widths1 + widths[16*i+:16]; + end + end +endfunction + +function integer sum_widths2; + input [(16*NADDENDS)-1:0] widths; + integer i; + begin + sum_widths2 = 0; + for (i = 0; i < NADDENDS; i++) begin + sum_widths2 = sum_widths2 + widths[16*i+:16]; + end + end +endfunction + +input [sum_widths1(A_WIDTHS)-1:0] A; // concatenation of LHS factors +input [sum_widths1(B_WIDTHS)-1:0] B; // concatenation of RHS factors +input [sum_widths2(C_WIDTHS)-1:0] C; // concatenation of summands +output reg [Y_WIDTH-1:0] Y; // output sum + +integer i, j, ai, bi, ci, aw, bw, cw; +reg [Y_WIDTH-1:0] product; +reg [Y_WIDTH-1:0] addend, oper_a, oper_b; + +always @* begin + Y = 0; + ai = 0; + bi = 0; + for (i = 0; i < NPRODUCTS; i = i+1) + begin + aw = A_WIDTHS[16*i+:16]; + bw = B_WIDTHS[16*i+:16]; + + oper_a = 0; + oper_b = 0; + for (j = 0; j < Y_WIDTH && j < aw; j = j + 1) + oper_a[j] = A[ai + j]; + for (j = 0; j < Y_WIDTH && j < bw; j = j + 1) + oper_b[j] = B[bi + j]; + // A_SIGNED[i] == B_SIGNED[i] as RTLIL invariant + if (A_SIGNED[i] && B_SIGNED[i]) begin + for (j = aw; j > 0 && j < Y_WIDTH; j = j + 1) + oper_a[j] = oper_a[j - 1]; + for (j = bw; j > 0 && j < Y_WIDTH; j = j + 1) + oper_b[j] = oper_b[j - 1]; + end + + product = oper_a * oper_b; + + if (PRODUCT_NEGATED[i]) + Y = Y - product; + else + Y = Y + product; + + ai = ai + aw; + bi = bi + bw; + end + + ci = 0; + for (i = 0; i < NADDENDS; i = i+1) + begin + cw = C_WIDTHS[16*i+:16]; + + addend = 0; + for (j = 0; j < Y_WIDTH && j < cw; j = j + 1) + addend[j] = C[ci + j]; + if (C_SIGNED[i]) begin + for (j = cw; j > 0 && j < Y_WIDTH; j = j + 1) + addend[j] = addend[j - 1]; + end + + if (ADDEND_NEGATED[i]) + Y = Y - addend; + else + Y = Y + addend; + + ci = ci + cw; + end +end + +endmodule + // -------------------------------------------------------- //* ver 2 //* title Divider diff --git a/techlibs/common/techmap.v b/techlibs/common/techmap.v index 11929614763..75651bb791b 100644 --- a/techlibs/common/techmap.v +++ b/techlibs/common/techmap.v @@ -290,7 +290,7 @@ module _90_alu (A, B, CI, BI, X, Y, CO); endmodule (* techmap_maccmap *) -(* techmap_celltype = "$macc" *) +(* techmap_celltype = "$macc $macc_v2" *) module _90_macc; endmodule diff --git a/tests/alumacc/basic.ys b/tests/alumacc/basic.ys new file mode 100644 index 00000000000..cdd20c47200 --- /dev/null +++ b/tests/alumacc/basic.ys @@ -0,0 +1,61 @@ +read_verilog <