From 22929846f5128d7102be32175b327e8480d2486a Mon Sep 17 00:00:00 2001 From: Dag Lem Date: Fri, 4 Aug 2023 23:45:47 +0200 Subject: [PATCH] Correction and optimization of nowrshmsk This makes tests/verilog/dynamic_range_lhs.v pass, after ensuring that nowrshmsk is actually tested. Stride is extracted from indexing of two-dimensional packed arrays and variable slices on the form dst[i*stride +: width] = src, and is used to optimize the generated CASE block. --- frontends/ast/simplify.cc | 98 +++++++++++++++++++++++-------- tests/verilog/dynamic_range_lhs.v | 2 +- 2 files changed, 76 insertions(+), 24 deletions(-) diff --git a/frontends/ast/simplify.cc b/frontends/ast/simplify.cc index 64191cd7ebf..78f52104834 100644 --- a/frontends/ast/simplify.cc +++ b/frontends/ast/simplify.cc @@ -35,6 +35,9 @@ #include #include #include +// For std::gcd in C++17 +// #include +#include YOSYS_NAMESPACE_BEGIN @@ -2824,27 +2827,12 @@ bool AstNode::simplify(bool const_fold, bool in_lvalue, int stage, int width_hin if (!children[0]->id2ast->range_valid) goto skip_dynamic_range_lvalue_expansion; - int source_width = children[0]->id2ast->range_left - children[0]->id2ast->range_right + 1; + AST::AstNode *member_node = get_struct_member(children[0]); + int source_width = member_node ? + member_node->range_left - member_node->range_right + 1 : + children[0]->id2ast->range_left - children[0]->id2ast->range_right + 1; int source_offset = children[0]->id2ast->range_right; int result_width = 1; - int stride = 1; - AST::AstNode *member_node = get_struct_member(children[0]); - if (member_node) { - // Clamp chunk to range of member within struct/union. - log_assert(!source_offset && !children[0]->id2ast->range_swapped); - source_width = member_node->range_left - member_node->range_right + 1; - - // When the (* nowrshmsk *) attribute is set, a CASE block is generated below - // to select the indexed bit slice. When a multirange array is indexed, the - // start of each possible slice is separated by the bit stride of the last - // index dimension, and we can optimize the CASE block accordingly. - // The dimension of the original array expression is saved in the 'integer' field. - int dims = children[0]->integer; - stride = source_width; - for (int dim = 0; dim < dims; dim++) { - stride /= get_struct_range_width(member_node, dim); - } - } AstNode *shift_expr = NULL; AstNode *range = children[0]->children[0]; @@ -2875,12 +2863,76 @@ bool AstNode::simplify(bool const_fold, bool in_lvalue, int stage, int width_hin { // big case block + int stride = 1; + + int case_width_hint; + bool case_sign_hint; + shift_expr->detectSignWidth(case_width_hint, case_sign_hint); + + // Extract (index)*(width) from non_opt_range pattern ((@selfsz@((index)*(width)))+(0)). + AstNode *lsb_expr = + shift_expr->type == AST_ADD && shift_expr->children[0]->type == AST_SELFSZ && + shift_expr->children[1]->type == AST_CONSTANT && shift_expr->children[1]->integer == 0 ? + shift_expr->children[0]->children[0] : + shift_expr; + + // Optimization: Extract stride from indexing of two-dimensional packed arrays and + // variable slices on the form dst[i*stride +: width] = src. + if (lsb_expr->type == AST_MUL && + (lsb_expr->children[0]->type == AST_CONSTANT || + lsb_expr->children[1]->type == AST_CONSTANT)) + { + int const_i = lsb_expr->children[1]->type == AST_CONSTANT; + stride = (int)lsb_expr->children[const_i]->integer; + } + else if (member_node) // Member in packed struct/union + { + // Clamp chunk to range of member within struct/union. + log_assert(!source_offset && !children[0]->id2ast->range_swapped); + + // When the (* nowrshmsk *) attribute is set, a CASE block is generated below + // to select the indexed bit slice. When a multirange array is indexed, the + // start of each possible slice is separated by the bit stride of the last + // index dimension, and we can optimize the CASE block accordingly. + // The dimension of the original array expression is saved in the 'integer' field. + int dims = children[0]->integer; + stride = source_width; + for (int dim = 0; dim < dims; dim++) { + stride /= get_struct_range_width(member_node, dim); + } + } + + // For (truncated) i*stride to be within the range of dst, the following must hold: + // i*stride ≡ bitno (mod shift_mod), i.e. + // i*stride = k*shift_mod + bitno + // + // The Diophantine equation on the form ax + by = c: + // stride*i - shift_mod*k = bitno + // has solutions iff c is a multiple of d = gcd(a, b), i.e. + // bitno mod gcd(stride, shift_mod) = 0 + // + // long long is at least 64 bits in C++11 + int max_bits = case_width_hint; + long long max_offset = (1ll << (max_bits - case_sign_hint)) - 1; + long long min_offset = case_sign_hint ? -(1ll << (max_bits - 1)) : 0; + long long shift_mod = 1ll << (case_width_hint - case_sign_hint); + // std::gcd requires C++17 + // long long bitno_div = std::gcd(stride, shift_mod); + long long bitno_div = boost::math::gcd((long long)stride, shift_mod); + did_something = true; newNode = new AstNode(AST_CASE, shift_expr); - for (int i = 0; i < source_width; i += stride) { + for (int i = 1 - result_width; i < source_width; i++) { + // Out of range indexes are handled in genrtlil.cc int start_bit = source_offset + i; - int end_bit = std::min(start_bit+result_width,source_width) - 1; - AstNode *cond = new AstNode(AST_COND, mkconst_int(start_bit, true)); + int end_bit = start_bit + result_width - 1; + // Check whether the current index can be generated by shift_expr. + if (start_bit < min_offset || start_bit > max_offset) + continue; + if (start_bit%bitno_div != 0 || (stride == 0 && start_bit != 0)) + continue; + + AstNode *cond = new AstNode(AST_COND, mkconst_int(start_bit, case_sign_hint, case_width_hint)); AstNode *lvalue = children[0]->clone(); lvalue->delete_children(); if (member_node) @@ -2893,7 +2945,7 @@ bool AstNode::simplify(bool const_fold, bool in_lvalue, int stage, int width_hin } else { - // mask and shift operations, disabled for now + // mask and shift operations AstNode *wire_mask = new AstNode(AST_WIRE, new AstNode(AST_RANGE, mkconst_int(source_width-1, true), mkconst_int(0, true))); wire_mask->str = stringf("$bitselwrite$mask$%s:%d$%d", RTLIL::encode_filename(filename).c_str(), location.first_line, autoidx++); diff --git a/tests/verilog/dynamic_range_lhs.v b/tests/verilog/dynamic_range_lhs.v index ae291374db9..56fe3ef3b32 100644 --- a/tests/verilog/dynamic_range_lhs.v +++ b/tests/verilog/dynamic_range_lhs.v @@ -1,6 +1,6 @@ module gate( - output reg [`LEFT:`RIGHT] out_u, out_s, (* nowrshmsk = `ALT *) + output reg [`LEFT:`RIGHT] out_u, out_s, input wire data, input wire [1:0] sel1, sel2 );