From 2cab4ff17368156a73d9357f1ec5bd39d75ebdd6 Mon Sep 17 00:00:00 2001 From: Dag Lem Date: Fri, 4 Aug 2023 23:45:47 +0200 Subject: [PATCH 1/4] Correction and optimization of nowrshmsk This makes tests/verilog/dynamic_range_lhs.v pass, after ensuring that nowrshmsk is actually tested. Stride is extracted from indexing of two-dimensional packed arrays and variable slices on the form dst[i*stride +: width] = src, and is used to optimize the generated CASE block. Also uses less confusing variable names for indexing of lhs wires. --- frontends/ast/simplify.cc | 156 +++++++++++++++++++++--------- tests/verilog/dynamic_range_lhs.v | 2 +- 2 files changed, 113 insertions(+), 45 deletions(-) diff --git a/frontends/ast/simplify.cc b/frontends/ast/simplify.cc index dfa1ed6af03..98a922ff472 100644 --- a/frontends/ast/simplify.cc +++ b/frontends/ast/simplify.cc @@ -35,12 +35,25 @@ #include #include #include +// For std::gcd in C++17 +// #include YOSYS_NAMESPACE_BEGIN using namespace AST; using namespace AST_INTERNAL; +// gcd computed by Euclidian division. +// To be replaced by C++17 std::gcd +template I gcd(I a, I b) { + while (b != 0) { + I tmp = b; + b = a%b; + a = tmp; + } + return std::abs(a); +} + void AstNode::set_in_lvalue_flag(bool flag, bool no_descend) { if (flag != in_lvalue_from_above) { @@ -2818,27 +2831,12 @@ bool AstNode::simplify(bool const_fold, int stage, int width_hint, bool sign_hin if (!children[0]->id2ast->range_valid) goto skip_dynamic_range_lvalue_expansion; - int source_width = children[0]->id2ast->range_left - children[0]->id2ast->range_right + 1; - int source_offset = children[0]->id2ast->range_right; - int result_width = 1; - int stride = 1; AST::AstNode *member_node = get_struct_member(children[0]); - if (member_node) { - // Clamp chunk to range of member within struct/union. - log_assert(!source_offset && !children[0]->id2ast->range_swapped); - source_width = member_node->range_left - member_node->range_right + 1; - - // When the (* nowrshmsk *) attribute is set, a CASE block is generated below - // to select the indexed bit slice. When a multirange array is indexed, the - // start of each possible slice is separated by the bit stride of the last - // index dimension, and we can optimize the CASE block accordingly. - // The dimension of the original array expression is saved in the 'integer' field. - int dims = children[0]->integer; - stride = source_width; - for (int dim = 0; dim < dims; dim++) { - stride /= get_struct_range_width(member_node, dim); - } - } + int wire_width = member_node ? + member_node->range_left - member_node->range_right + 1 : + children[0]->id2ast->range_left - children[0]->id2ast->range_right + 1; + int wire_offset = children[0]->id2ast->range_right; + int result_width = 1; AstNode *shift_expr = NULL; AstNode *range = children[0]->children[0]; @@ -2851,30 +2849,102 @@ bool AstNode::simplify(bool const_fold, int stage, int width_hint, bool sign_hin else shift_expr = range->children[0]->clone(); - bool use_case_method = false; - - if (children[0]->id2ast->attributes.count(ID::nowrshmsk)) { - AstNode *node = children[0]->id2ast->attributes.at(ID::nowrshmsk); - while (node->simplify(true, stage, -1, false)) { } - if (node->type != AST_CONSTANT) - input_error("Non-constant value for `nowrshmsk' attribute on `%s'!\n", children[0]->id2ast->str.c_str()); - if (node->asAttrConst().as_bool()) - use_case_method = true; - } + bool use_case_method = children[0]->id2ast->get_bool_attribute(ID::nowrshmsk); if (!use_case_method && current_always->detect_latch(children[0]->str)) use_case_method = true; - if (use_case_method) - { + if (use_case_method) { // big case block + int stride = 1; + long long bitno_div = stride; + + int case_width_hint; + bool case_sign_hint; + shift_expr->detectSignWidth(case_width_hint, case_sign_hint); + int max_width = case_width_hint; + + if (member_node) { // Member in packed struct/union + // Clamp chunk to range of member within struct/union. + log_assert(!wire_offset && !children[0]->id2ast->range_swapped); + + // When the (* nowrshmsk *) attribute is set, a CASE block is generated below + // to select the indexed bit slice. When a multirange array is indexed, the + // start of each possible slice is separated by the bit stride of the last + // index dimension, and we can optimize the CASE block accordingly. + // The dimension of the original array expression is saved in the 'integer' field. + int dims = children[0]->integer; + stride = wire_width; + for (int dim = 0; dim < dims; dim++) { + stride /= get_struct_range_width(member_node, dim); + } + bitno_div = stride; + } else { + // Extract (index)*(width) from non_opt_range pattern ((@selfsz@((index)*(width)))+(0)). + AstNode *lsb_expr = + shift_expr->type == AST_ADD && shift_expr->children[0]->type == AST_SELFSZ && + shift_expr->children[1]->type == AST_CONSTANT && shift_expr->children[1]->integer == 0 ? + shift_expr->children[0]->children[0] : + shift_expr; + + // Extract stride from indexing of two-dimensional packed arrays and + // variable slices on the form dst[i*stride +: width] = src. + if (lsb_expr->type == AST_MUL && + (lsb_expr->children[0]->type == AST_CONSTANT || + lsb_expr->children[1]->type == AST_CONSTANT)) + { + int stride_ix = lsb_expr->children[1]->type == AST_CONSTANT; + stride = (int)lsb_expr->children[stride_ix]->integer; + bitno_div = stride != 0 ? stride : 1; + + // Check whether i*stride can overflow. + int i_width; + bool i_sign; + lsb_expr->children[1 - stride_ix]->detectSignWidth(i_width, i_sign); + int stride_width; + bool stride_sign; + lsb_expr->children[stride_ix]->detectSignWidth(stride_width, stride_sign); + max_width = std::max(i_width, stride_width); + // Stride width calculated from actual stride value. + stride_width = std::ceil(std::log2(std::abs(stride))); + + if (i_width + stride_width > max_width) { + // For (truncated) i*stride to be within the range of dst, the following must hold: + // i*stride ≡ bitno (mod shift_mod), i.e. + // i*stride = k*shift_mod + bitno + // + // The Diophantine equation on the form ax + by = c: + // stride*i - shift_mod*k = bitno + // has solutions iff c is a multiple of d = gcd(a, b), i.e. + // bitno mod gcd(stride, shift_mod) = 0 + // + // long long is at least 64 bits in C++11 + long long shift_mod = 1ll << (max_width - case_sign_hint); + // std::gcd requires C++17 + // bitno_div = std::gcd(stride, shift_mod); + bitno_div = gcd((long long)stride, shift_mod); + } + } + } + + // long long is at least 64 bits in C++11 + long long max_offset = (1ll << (max_width - case_sign_hint)) - 1; + long long min_offset = case_sign_hint ? -(1ll << (max_width - 1)) : 0; + did_something = true; newNode = new AstNode(AST_CASE, shift_expr); - for (int i = 0; i < source_width; i += stride) { - int start_bit = source_offset + i; - int end_bit = std::min(start_bit+result_width,source_width) - 1; - AstNode *cond = new AstNode(AST_COND, mkconst_int(start_bit, true)); + for (int i = 1 - result_width; i < wire_width; i++) { + // Out of range indexes are handled in genrtlil.cc + int start_bit = wire_offset + i; + int end_bit = start_bit + result_width - 1; + // Check whether the current index can be generated by shift_expr. + if (start_bit < min_offset || start_bit > max_offset) + continue; + if (start_bit%bitno_div != 0 || (stride == 0 && start_bit != 0)) + continue; + + AstNode *cond = new AstNode(AST_COND, mkconst_int(start_bit, case_sign_hint, max_width)); AstNode *lvalue = children[0]->clone(); lvalue->delete_children(); if (member_node) @@ -2884,19 +2954,17 @@ bool AstNode::simplify(bool const_fold, int stage, int width_hint, bool sign_hin cond->children.push_back(new AstNode(AST_BLOCK, new AstNode(type, lvalue, children[1]->clone()))); newNode->children.push_back(cond); } - } - else - { - // mask and shift operations, disabled for now + } else { + // mask and shift operations - AstNode *wire_mask = new AstNode(AST_WIRE, new AstNode(AST_RANGE, mkconst_int(source_width-1, true), mkconst_int(0, true))); + AstNode *wire_mask = new AstNode(AST_WIRE, new AstNode(AST_RANGE, mkconst_int(wire_width-1, true), mkconst_int(0, true))); wire_mask->str = stringf("$bitselwrite$mask$%s:%d$%d", RTLIL::encode_filename(filename).c_str(), location.first_line, autoidx++); wire_mask->set_attribute(ID::nosync, AstNode::mkconst_int(1, false)); wire_mask->is_logic = true; while (wire_mask->simplify(true, 1, -1, false)) { } current_ast_mod->children.push_back(wire_mask); - AstNode *wire_data = new AstNode(AST_WIRE, new AstNode(AST_RANGE, mkconst_int(source_width-1, true), mkconst_int(0, true))); + AstNode *wire_data = new AstNode(AST_WIRE, new AstNode(AST_RANGE, mkconst_int(wire_width-1, true), mkconst_int(0, true))); wire_data->str = stringf("$bitselwrite$data$%s:%d$%d", RTLIL::encode_filename(filename).c_str(), location.first_line, autoidx++); wire_data->set_attribute(ID::nosync, AstNode::mkconst_int(1, false)); wire_data->is_logic = true; @@ -2952,12 +3020,12 @@ bool AstNode::simplify(bool const_fold, int stage, int width_hint, bool sign_hin shamt = new AstNode(AST_TO_SIGNED, shamt); // offset the shift amount by the lower bound of the dimension - int start_bit = source_offset; + int start_bit = wire_offset; shamt = new AstNode(AST_SUB, shamt, mkconst_int(start_bit, true)); // reflect the shift amount if the dimension is swapped if (children[0]->id2ast->range_swapped) - shamt = new AstNode(AST_SUB, mkconst_int(source_width - result_width, true), shamt); + shamt = new AstNode(AST_SUB, mkconst_int(wire_width - result_width, true), shamt); // AST_SHIFT uses negative amounts for shifting left shamt = new AstNode(AST_NEG, shamt); diff --git a/tests/verilog/dynamic_range_lhs.v b/tests/verilog/dynamic_range_lhs.v index ae291374db9..56fe3ef3b32 100644 --- a/tests/verilog/dynamic_range_lhs.v +++ b/tests/verilog/dynamic_range_lhs.v @@ -1,6 +1,6 @@ module gate( - output reg [`LEFT:`RIGHT] out_u, out_s, (* nowrshmsk = `ALT *) + output reg [`LEFT:`RIGHT] out_u, out_s, input wire data, input wire [1:0] sel1, sel2 ); From a105d2c050eda59a2bbc7af14190ed2b050dc061 Mon Sep 17 00:00:00 2001 From: Dag Lem Date: Wed, 22 Nov 2023 06:52:04 +0100 Subject: [PATCH 2/4] Add torture test for (* nowrshmsk *) stride optimization --- tests/various/dynamic_part_select.ys | 18 +++++++++++++++++ .../forloop_select_nowrshmsk.v | 20 +++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 tests/various/dynamic_part_select/forloop_select_nowrshmsk.v diff --git a/tests/various/dynamic_part_select.ys b/tests/various/dynamic_part_select.ys index 2dc061e899b..9e303b9db2e 100644 --- a/tests/various/dynamic_part_select.ys +++ b/tests/various/dynamic_part_select.ys @@ -69,6 +69,24 @@ design -copy-from gate -as gate gate miter -equiv -make_assert -make_outcmp -flatten gold gate equiv sat -prove-asserts -seq 10 -show-public -verify -set-init-zero equiv +### For-loop select, one dynamic input, (* nowrshmsk *) +design -reset +read_verilog ./dynamic_part_select/forloop_select_nowrshmsk.v +proc +rename -top gold +design -stash gold + +read_verilog ./dynamic_part_select/forloop_select_gate.v +proc +rename -top gate +design -stash gate + +design -copy-from gold -as gold gold +design -copy-from gate -as gate gate + +miter -equiv -make_assert -make_outcmp -flatten gold gate equiv +sat -prove-asserts -seq 10 -show-public -verify -set-init-zero equiv + #### Double loop (part-select, reset) ### design -reset read_verilog ./dynamic_part_select/reset_test.v diff --git a/tests/various/dynamic_part_select/forloop_select_nowrshmsk.v b/tests/various/dynamic_part_select/forloop_select_nowrshmsk.v new file mode 100644 index 00000000000..75415c3130e --- /dev/null +++ b/tests/various/dynamic_part_select/forloop_select_nowrshmsk.v @@ -0,0 +1,20 @@ +`default_nettype none +module forloop_select #(parameter WIDTH=16, SELW=4, CTRLW=$clog2(WIDTH), DINW=2**SELW) + (input wire clk, + input wire [CTRLW-1:0] ctrl, + input wire [DINW-1:0] din, + input wire en, + (* nowrshmsk *) + output reg [WIDTH-1:0] dout); + + reg [SELW:0] sel; + localparam SLICE = WIDTH/(SELW**2); + + always @(posedge clk) + begin + if (en) begin + for (sel = 0; sel <= 4'hf; sel=sel+1'b1) + dout[(ctrl*sel)+:SLICE] <= din; + end + end +endmodule From dbec704b49aebd8f1ac5ff9ed36b357cd9b99973 Mon Sep 17 00:00:00 2001 From: Dag Lem Date: Thu, 7 Dec 2023 13:45:56 +0100 Subject: [PATCH 3/4] Include x bits in test of lhs dynamic part-select --- tests/verilog/dynamic_range_lhs.sh | 2 +- tests/verilog/dynamic_range_lhs.v | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/verilog/dynamic_range_lhs.sh b/tests/verilog/dynamic_range_lhs.sh index 77b4a291834..f36c74bd2c8 100755 --- a/tests/verilog/dynamic_range_lhs.sh +++ b/tests/verilog/dynamic_range_lhs.sh @@ -15,7 +15,7 @@ run() { -p "read_verilog dynamic_range_lhs.v" \ -p "proc" \ -p "equiv_make gold gate equiv" \ - -p "equiv_simple" \ + -p "equiv_simple -undef" \ -p "equiv_status -assert" } diff --git a/tests/verilog/dynamic_range_lhs.v b/tests/verilog/dynamic_range_lhs.v index 56fe3ef3b32..6eb95216571 100644 --- a/tests/verilog/dynamic_range_lhs.v +++ b/tests/verilog/dynamic_range_lhs.v @@ -5,8 +5,8 @@ module gate( input wire [1:0] sel1, sel2 ); always @* begin - out_u = 0; - out_s = 0; + out_u = 'x; + out_s = 'x; case (`SPAN) 1: begin out_u[sel1*sel2] = data; @@ -43,8 +43,8 @@ task set; out_s[b] = data; endtask always @* begin - out_u = 0; - out_s = 0; + out_u = 'x; + out_s = 'x; case (sel1*sel2) 2'b00: set(0, 0); 2'b01: set(1, 1); From 1a2b4759e85928d305f9b082d30f0b8a2fc46e0e Mon Sep 17 00:00:00 2001 From: Dag Lem Date: Fri, 8 Dec 2023 20:47:43 +0100 Subject: [PATCH 4/4] Assign from rvalue via temporary register in nowrshmsk CASE Avoid repeating complex rvalue expressions for each condition. --- frontends/ast/ast.cc | 19 +++++++++++++++++++ frontends/ast/ast.h | 3 +++ frontends/ast/simplify.cc | 16 +++++++++++++--- 3 files changed, 35 insertions(+), 3 deletions(-) diff --git a/frontends/ast/ast.cc b/frontends/ast/ast.cc index 34e6249939e..4b2b7a8220d 100644 --- a/frontends/ast/ast.cc +++ b/frontends/ast/ast.cc @@ -850,6 +850,25 @@ AstNode *AstNode::mkconst_str(const std::string &str) return node; } +// create a temporary register +AstNode *AstNode::mktemp_logic(const std::string &name, AstNode *mod, bool nosync, int range_left, int range_right, bool is_signed) +{ + AstNode *wire = new AstNode(AST_WIRE, new AstNode(AST_RANGE, mkconst_int(range_left, true), mkconst_int(range_right, true))); + wire->str = stringf("%s%s:%d$%d", name.c_str(), RTLIL::encode_filename(filename).c_str(), location.first_line, autoidx++); + if (nosync) + wire->set_attribute(ID::nosync, AstNode::mkconst_int(1, false)); + wire->is_signed = is_signed; + wire->is_logic = true; + mod->children.push_back(wire); + while (wire->simplify(true, 1, -1, false)) { } + + AstNode *ident = new AstNode(AST_IDENTIFIER); + ident->str = wire->str; + ident->id2ast = wire; + + return ident; +} + bool AstNode::bits_only_01() const { for (auto bit : bits) diff --git a/frontends/ast/ast.h b/frontends/ast/ast.h index f789e930b3e..97903d0a046 100644 --- a/frontends/ast/ast.h +++ b/frontends/ast/ast.h @@ -321,6 +321,9 @@ namespace AST static AstNode *mkconst_str(const std::vector &v); static AstNode *mkconst_str(const std::string &str); + // helper function to create an AST node for a temporary register + AstNode *mktemp_logic(const std::string &name, AstNode *mod, bool nosync, int range_left, int range_right, bool is_signed); + // helper function for creating sign-extended const objects RTLIL::Const bitsAsConst(int width, bool is_signed); RTLIL::Const bitsAsConst(int width = -1); diff --git a/frontends/ast/simplify.cc b/frontends/ast/simplify.cc index 98a922ff472..945f286a1f6 100644 --- a/frontends/ast/simplify.cc +++ b/frontends/ast/simplify.cc @@ -2932,8 +2932,18 @@ bool AstNode::simplify(bool const_fold, int stage, int width_hint, bool sign_hin long long max_offset = (1ll << (max_width - case_sign_hint)) - 1; long long min_offset = case_sign_hint ? -(1ll << (max_width - 1)) : 0; + // A temporary register holds the result of the (possibly complex) rvalue expression, + // avoiding repetition in each AST_COND below. + int rvalue_width; + bool rvalue_sign; + children[1]->detectSignWidth(rvalue_width, rvalue_sign); + AstNode *rvalue = mktemp_logic("$bitselwrite$rvalue$", current_ast_mod, true, rvalue_width - 1, 0, rvalue_sign); + AstNode *caseNode = new AstNode(AST_CASE, shift_expr); + newNode = new AstNode(AST_BLOCK, + new AstNode(AST_ASSIGN_EQ, rvalue, children[1]->clone()), + caseNode); + did_something = true; - newNode = new AstNode(AST_CASE, shift_expr); for (int i = 1 - result_width; i < wire_width; i++) { // Out of range indexes are handled in genrtlil.cc int start_bit = wire_offset + i; @@ -2951,8 +2961,8 @@ bool AstNode::simplify(bool const_fold, int stage, int width_hint, bool sign_hin lvalue->set_attribute(ID::wiretype, member_node->clone()); lvalue->children.push_back(new AstNode(AST_RANGE, mkconst_int(end_bit, true), mkconst_int(start_bit, true))); - cond->children.push_back(new AstNode(AST_BLOCK, new AstNode(type, lvalue, children[1]->clone()))); - newNode->children.push_back(cond); + cond->children.push_back(new AstNode(AST_BLOCK, new AstNode(type, lvalue, rvalue->clone()))); + caseNode->children.push_back(cond); } } else { // mask and shift operations