+
+ Line data Source code
+
+ 1 : /*
+ 2 : * yosys -- Yosys Open SYnthesis Suite
+ 3 : *
+ 4 : * Copyright (C) 2019-2020 whitequark <whitequark@whitequark.org>
+ 5 : *
+ 6 : * Permission to use, copy, modify, and/or distribute this software for any
+ 7 : * purpose with or without fee is hereby granted.
+ 8 : *
+ 9 : * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ 10 : * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ 11 : * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ 12 : * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ 13 : * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ 14 : * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ 15 : * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ 16 : *
+ 17 : */
+ 18 :
+ 19 : // This file is included by the designs generated with `write_cxxrtl`. It is not used in Yosys itself.
+ 20 : //
+ 21 : // The CXXRTL support library implements compile time specialized arbitrary width arithmetics, as well as provides
+ 22 : // composite lvalues made out of bit slices and concatenations of lvalues. This allows the `write_cxxrtl` pass
+ 23 : // to perform a straightforward translation of RTLIL structures to readable C++, relying on the C++ compiler
+ 24 : // to unwrap the abstraction and generate efficient code.
+ 25 :
+ 26 : #ifndef CXXRTL_H
+ 27 : #define CXXRTL_H
+ 28 :
+ 29 : #include <cstddef>
+ 30 : #include <cstdint>
+ 31 : #include <cstring>
+ 32 : #include <cassert>
+ 33 : #include <limits>
+ 34 : #include <type_traits>
+ 35 : #include <tuple>
+ 36 : #include <vector>
+ 37 : #include <map>
+ 38 : #include <algorithm>
+ 39 : #include <memory>
+ 40 : #include <functional>
+ 41 : #include <sstream>
+ 42 : #include <iostream>
+ 43 :
+ 44 : // `cxxrtl::debug_item` has to inherit from `cxxrtl_object` to satisfy strict aliasing requirements.
+ 45 : #include <cxxrtl/capi/cxxrtl_capi.h>
+ 46 :
+ 47 : #ifndef __has_attribute
+ 48 : # define __has_attribute(x) 0
+ 49 : #endif
+ 50 :
+ 51 : // CXXRTL essentially uses the C++ compiler as a hygienic macro engine that feeds an instruction selector.
+ 52 : // It generates a lot of specialized template functions with relatively large bodies that, when inlined
+ 53 : // into the caller and (for those with loops) unrolled, often expose many new optimization opportunities.
+ 54 : // Because of this, most of the CXXRTL runtime must be always inlined for best performance.
+ 55 : #if __has_attribute(always_inline)
+ 56 : #define CXXRTL_ALWAYS_INLINE inline __attribute__((__always_inline__))
+ 57 : #else
+ 58 : #define CXXRTL_ALWAYS_INLINE inline
+ 59 : #endif
+ 60 : // Conversely, some functions in the generated code are extremely large yet very cold, with both of these
+ 61 : // properties being extreme enough to confuse C++ compilers into spending pathological amounts of time
+ 62 : // on a futile (the code becomes worse) attempt to optimize the least important parts of code.
+ 63 : #if __has_attribute(optnone)
+ 64 : #define CXXRTL_EXTREMELY_COLD __attribute__((__optnone__))
+ 65 : #elif __has_attribute(optimize)
+ 66 : #define CXXRTL_EXTREMELY_COLD __attribute__((__optimize__(0)))
+ 67 : #else
+ 68 : #define CXXRTL_EXTREMELY_COLD
+ 69 : #endif
+ 70 :
+ 71 : // CXXRTL uses assert() to check for C++ contract violations (which may result in e.g. undefined behavior
+ 72 : // of the simulation code itself), and CXXRTL_ASSERT to check for RTL contract violations (which may at
+ 73 : // most result in undefined simulation results).
+ 74 : //
+ 75 : // Though by default, CXXRTL_ASSERT() expands to assert(), it may be overridden e.g. when integrating
+ 76 : // the simulation into another process that should survive violating RTL contracts.
+ 77 : #ifndef CXXRTL_ASSERT
+ 78 : #ifndef CXXRTL_NDEBUG
+ 79 : #define CXXRTL_ASSERT(x) assert(x)
+ 80 : #else
+ 81 : #define CXXRTL_ASSERT(x)
+ 82 : #endif
+ 83 : #endif
+ 84 :
+ 85 : namespace cxxrtl {
+ 86 :
+ 87 : // All arbitrary-width values in CXXRTL are backed by arrays of unsigned integers called chunks. The chunk size
+ 88 : // is the same regardless of the value width to simplify manipulating values via FFI interfaces, e.g. driving
+ 89 : // and introspecting the simulation in Python.
+ 90 : //
+ 91 : // It is practical to use chunk sizes between 32 bits and platform register size because when arithmetics on
+ 92 : // narrower integer types is legalized by the C++ compiler, it inserts code to clear the high bits of the register.
+ 93 : // However, (a) most of our operations do not change those bits in the first place because of invariants that are
+ 94 : // invisible to the compiler, (b) we often operate on non-power-of-2 values and have to clear the high bits anyway.
+ 95 : // Therefore, using relatively wide chunks and clearing the high bits explicitly and only when we know they may be
+ 96 : // clobbered results in simpler generated code.
+ 97 : typedef uint32_t chunk_t;
+ 98 : typedef uint64_t wide_chunk_t;
+ 99 :
+ 100 : template<typename T>
+ 101 : struct chunk_traits {
+ 102 : static_assert(std::is_integral<T>::value && std::is_unsigned<T>::value,
+ 103 : "chunk type must be an unsigned integral type");
+ 104 : using type = T;
+ 105 : static constexpr size_t bits = std::numeric_limits<T>::digits;
+ 106 : static constexpr T mask = std::numeric_limits<T>::max();
+ 107 : };
+ 108 :
+ 109 : template<class T>
+ 110 : struct expr_base;
+ 111 :
+ 112 : template<size_t Bits>
+ 113 : struct value : public expr_base<value<Bits>> {
+ 114 : static constexpr size_t bits = Bits;
+ 115 :
+ 116 : using chunk = chunk_traits<chunk_t>;
+ 117 : static constexpr chunk::type msb_mask = (Bits % chunk::bits == 0) ? chunk::mask
+ 118 : : chunk::mask >> (chunk::bits - (Bits % chunk::bits));
+ 119 :
+ 120 : static constexpr size_t chunks = (Bits + chunk::bits - 1) / chunk::bits;
+ 121 : chunk::type data[chunks] = {};
+ 122 :
+ 123 3 : value() = default;
+ 124 : template<typename... Init>
+ 125 : explicit constexpr value(Init ...init) : data{init...} {}
+ 126 :
+ 127 : value(const value<Bits> &) = default;
+ 128 : value<Bits> &operator=(const value<Bits> &) = default;
+ 129 :
+ 130 : value(value<Bits> &&) = default;
+ 131 : value<Bits> &operator=(value<Bits> &&) = default;
+ 132 :
+ 133 : // A (no-op) helper that forces the cast to value<>.
+ 134 : CXXRTL_ALWAYS_INLINE
+ 135 : const value<Bits> &val() const {
+ 136 : return *this;
+ 137 : }
+ 138 :
+ 139 : std::string str() const {
+ 140 : std::stringstream ss;
+ 141 : ss << *this;
+ 142 : return ss.str();
+ 143 : }
+ 144 :
+ 145 : // Conversion operations.
+ 146 : //
+ 147 : // These functions ensure that a conversion is never out of range, and should be always used, if at all
+ 148 : // possible, instead of direct manipulation of the `data` member. For very large types, .slice() and
+ 149 : // .concat() can be used to split them into more manageable parts.
+ 150 : template<class IntegerT, typename std::enable_if<!std::is_signed<IntegerT>::value, int>::type = 0>
+ 151 : CXXRTL_ALWAYS_INLINE
+ 152 : IntegerT get() const {
+ 153 : static_assert(std::numeric_limits<IntegerT>::is_integer && !std::numeric_limits<IntegerT>::is_signed,
+ 154 : "get<T>() requires T to be an unsigned integral type");
+ 155 : static_assert(std::numeric_limits<IntegerT>::digits >= Bits,
+ 156 : "get<T>() requires T to be at least as wide as the value is");
+ 157 : IntegerT result = 0;
+ 158 : for (size_t n = 0; n < chunks; n++)
+ 159 : result |= IntegerT(data[n]) << (n * chunk::bits);
+ 160 : return result;
+ 161 : }
+ 162 :
+ 163 : template<class IntegerT, typename std::enable_if<std::is_signed<IntegerT>::value, int>::type = 0>
+ 164 : CXXRTL_ALWAYS_INLINE
+ 165 : IntegerT get() const {
+ 166 : auto unsigned_result = get<typename std::make_unsigned<IntegerT>::type>();
+ 167 : IntegerT result;
+ 168 : memcpy(&result, &unsigned_result, sizeof(IntegerT));
+ 169 : return result;
+ 170 : }
+ 171 :
+ 172 : template<class IntegerT, typename std::enable_if<!std::is_signed<IntegerT>::value, int>::type = 0>
+ 173 : CXXRTL_ALWAYS_INLINE
+ 174 30 : void set(IntegerT value) {
+ 175 : static_assert(std::numeric_limits<IntegerT>::is_integer && !std::numeric_limits<IntegerT>::is_signed,
+ 176 : "set<T>() requires T to be an unsigned integral type");
+ 177 : static_assert(std::numeric_limits<IntegerT>::digits >= Bits,
+ 178 : "set<T>() requires the value to be at least as wide as T is");
+ 179 6 : for (size_t n = 0; n < chunks; n++)
+ 180 36 : data[n] = (value >> (n * chunk::bits)) & chunk::mask;
+ 181 : }
+ 182 :
+ 183 : template<class IntegerT, typename std::enable_if<std::is_signed<IntegerT>::value, int>::type = 0>
+ 184 : CXXRTL_ALWAYS_INLINE
+ 185 : void set(IntegerT value) {
+ 186 : typename std::make_unsigned<IntegerT>::type unsigned_value;
+ 187 : memcpy(&unsigned_value, &value, sizeof(IntegerT));
+ 188 : set(unsigned_value);
+ 189 : }
+ 190 :
+ 191 : // Operations with compile-time parameters.
+ 192 : //
+ 193 : // These operations are used to implement slicing, concatenation, and blitting.
+ 194 : // The trunc, zext and sext operations add or remove most significant bits (i.e. on the left);
+ 195 : // the rtrunc and rzext operations add or remove least significant bits (i.e. on the right).
+ 196 : template<size_t NewBits>
+ 197 : CXXRTL_ALWAYS_INLINE
+ 198 : value<NewBits> trunc() const {
+ 199 : static_assert(NewBits <= Bits, "trunc() may not increase width");
+ 200 : value<NewBits> result;
+ 201 : for (size_t n = 0; n < result.chunks; n++)
+ 202 : result.data[n] = data[n];
+ 203 : result.data[result.chunks - 1] &= result.msb_mask;
+ 204 : return result;
+ 205 : }
+ 206 :
+ 207 : template<size_t NewBits>
+ 208 : CXXRTL_ALWAYS_INLINE
+ 209 33 : value<NewBits> zext() const {
+ 210 : static_assert(NewBits >= Bits, "zext() may not decrease width");
+ 211 : value<NewBits> result;
+ 212 33 : for (size_t n = 0; n < chunks; n++)
+ 213 33 : result.data[n] = data[n];
+ 214 : return result;
+ 215 : }
+ 216 :
+ 217 : template<size_t NewBits>
+ 218 : CXXRTL_ALWAYS_INLINE
+ 219 : value<NewBits> sext() const {
+ 220 : static_assert(NewBits >= Bits, "sext() may not decrease width");
+ 221 : value<NewBits> result;
+ 222 : for (size_t n = 0; n < chunks; n++)
+ 223 : result.data[n] = data[n];
+ 224 : if (is_neg()) {
+ 225 : result.data[chunks - 1] |= ~msb_mask;
+ 226 : for (size_t n = chunks; n < result.chunks; n++)
+ 227 : result.data[n] = chunk::mask;
+ 228 : result.data[result.chunks - 1] &= result.msb_mask;
+ 229 : }
+ 230 : return result;
+ 231 : }
+ 232 :
+ 233 : template<size_t NewBits>
+ 234 : CXXRTL_ALWAYS_INLINE
+ 235 : value<NewBits> rtrunc() const {
+ 236 : static_assert(NewBits <= Bits, "rtrunc() may not increase width");
+ 237 : value<NewBits> result;
+ 238 : constexpr size_t shift_chunks = (Bits - NewBits) / chunk::bits;
+ 239 : constexpr size_t shift_bits = (Bits - NewBits) % chunk::bits;
+ 240 : chunk::type carry = 0;
+ 241 : if (shift_chunks + result.chunks < chunks) {
+ 242 : carry = (shift_bits == 0) ? 0
+ 243 : : data[shift_chunks + result.chunks] << (chunk::bits - shift_bits);
+ 244 : }
+ 245 : for (size_t n = result.chunks; n > 0; n--) {
+ 246 : result.data[n - 1] = carry | (data[shift_chunks + n - 1] >> shift_bits);
+ 247 : carry = (shift_bits == 0) ? 0
+ 248 : : data[shift_chunks + n - 1] << (chunk::bits - shift_bits);
+ 249 : }
+ 250 : return result;
+ 251 : }
+ 252 :
+ 253 : template<size_t NewBits>
+ 254 : CXXRTL_ALWAYS_INLINE
+ 255 : value<NewBits> rzext() const {
+ 256 : static_assert(NewBits >= Bits, "rzext() may not decrease width");
+ 257 : value<NewBits> result;
+ 258 : constexpr size_t shift_chunks = (NewBits - Bits) / chunk::bits;
+ 259 : constexpr size_t shift_bits = (NewBits - Bits) % chunk::bits;
+ 260 : chunk::type carry = 0;
+ 261 : for (size_t n = 0; n < chunks; n++) {
+ 262 : result.data[shift_chunks + n] = (data[n] << shift_bits) | carry;
+ 263 : carry = (shift_bits == 0) ? 0
+ 264 : : data[n] >> (chunk::bits - shift_bits);
+ 265 : }
+ 266 : if (shift_chunks + chunks < result.chunks)
+ 267 : result.data[shift_chunks + chunks] = carry;
+ 268 : return result;
+ 269 : }
+ 270 :
+ 271 : // Bit blit operation, i.e. a partial read-modify-write.
+ 272 : template<size_t Stop, size_t Start>
+ 273 : CXXRTL_ALWAYS_INLINE
+ 274 : value<Bits> blit(const value<Stop - Start + 1> &source) const {
+ 275 : static_assert(Stop >= Start, "blit() may not reverse bit order");
+ 276 : constexpr chunk::type start_mask = ~(chunk::mask << (Start % chunk::bits));
+ 277 : constexpr chunk::type stop_mask = (Stop % chunk::bits + 1 == chunk::bits) ? 0
+ 278 : : (chunk::mask << (Stop % chunk::bits + 1));
+ 279 : value<Bits> masked = *this;
+ 280 : if (Start / chunk::bits == Stop / chunk::bits) {
+ 281 : masked.data[Start / chunk::bits] &= stop_mask | start_mask;
+ 282 : } else {
+ 283 : masked.data[Start / chunk::bits] &= start_mask;
+ 284 : for (size_t n = Start / chunk::bits + 1; n < Stop / chunk::bits; n++)
+ 285 : masked.data[n] = 0;
+ 286 : masked.data[Stop / chunk::bits] &= stop_mask;
+ 287 : }
+ 288 : value<Bits> shifted = source
+ 289 : .template rzext<Stop + 1>()
+ 290 : .template zext<Bits>();
+ 291 : return masked.bit_or(shifted);
+ 292 : }
+ 293 :
+ 294 : // Helpers for selecting extending or truncating operation depending on whether the result is wider or narrower
+ 295 : // than the operand. In C++17 these can be replaced with `if constexpr`.
+ 296 : template<size_t NewBits, typename = void>
+ 297 : struct zext_cast {
+ 298 : CXXRTL_ALWAYS_INLINE
+ 299 33 : value<NewBits> operator()(const value<Bits> &val) {
+ 300 33 : return val.template zext<NewBits>();
+ 301 : }
+ 302 : };
+ 303 :
+ 304 : template<size_t NewBits>
+ 305 : struct zext_cast<NewBits, typename std::enable_if<(NewBits < Bits)>::type> {
+ 306 : CXXRTL_ALWAYS_INLINE
+ 307 : value<NewBits> operator()(const value<Bits> &val) {
+ 308 : return val.template trunc<NewBits>();
+ 309 : }
+ 310 : };
+ 311 :
+ 312 : template<size_t NewBits, typename = void>
+ 313 : struct sext_cast {
+ 314 : CXXRTL_ALWAYS_INLINE
+ 315 : value<NewBits> operator()(const value<Bits> &val) {
+ 316 : return val.template sext<NewBits>();
+ 317 : }
+ 318 : };
+ 319 :
+ 320 : template<size_t NewBits>
+ 321 : struct sext_cast<NewBits, typename std::enable_if<(NewBits < Bits)>::type> {
+ 322 : CXXRTL_ALWAYS_INLINE
+ 323 : value<NewBits> operator()(const value<Bits> &val) {
+ 324 : return val.template trunc<NewBits>();
+ 325 : }
+ 326 : };
+ 327 :
+ 328 : template<size_t NewBits>
+ 329 : CXXRTL_ALWAYS_INLINE
+ 330 33 : value<NewBits> zcast() const {
+ 331 33 : return zext_cast<NewBits>()(*this);
+ 332 : }
+ 333 :
+ 334 : template<size_t NewBits>
+ 335 : CXXRTL_ALWAYS_INLINE
+ 336 : value<NewBits> scast() const {
+ 337 : return sext_cast<NewBits>()(*this);
+ 338 : }
+ 339 :
+ 340 : // Bit replication is far more efficient than the equivalent concatenation.
+ 341 : template<size_t Count>
+ 342 : CXXRTL_ALWAYS_INLINE
+ 343 : value<Bits * Count> repeat() const {
+ 344 : static_assert(Bits == 1, "repeat() is implemented only for 1-bit values");
+ 345 : return *this ? value<Bits * Count>().bit_not() : value<Bits * Count>();
+ 346 : }
+ 347 :
+ 348 : // Operations with run-time parameters (offsets, amounts, etc).
+ 349 : //
+ 350 : // These operations are used for computations.
+ 351 : bool bit(size_t offset) const {
+ 352 : return data[offset / chunk::bits] & (1 << (offset % chunk::bits));
+ 353 : }
+ 354 :
+ 355 : void set_bit(size_t offset, bool value = true) {
+ 356 : size_t offset_chunks = offset / chunk::bits;
+ 357 : size_t offset_bits = offset % chunk::bits;
+ 358 : data[offset_chunks] &= ~(1 << offset_bits);
+ 359 : data[offset_chunks] |= value ? 1 << offset_bits : 0;
+ 360 : }
+ 361 :
+ 362 : explicit operator bool() const {
+ 363 : return !is_zero();
+ 364 : }
+ 365 :
+ 366 : bool is_zero() const {
+ 367 : for (size_t n = 0; n < chunks; n++)
+ 368 : if (data[n] != 0)
+ 369 : return false;
+ 370 : return true;
+ 371 : }
+ 372 :
+ 373 : bool is_neg() const {
+ 374 : return data[chunks - 1] & (1 << ((Bits - 1) % chunk::bits));
+ 375 : }
+ 376 :
+ 377 : bool operator ==(const value<Bits> &other) const {
+ 378 : for (size_t n = 0; n < chunks; n++)
+ 379 : if (data[n] != other.data[n])
+ 380 : return false;
+ 381 : return true;
+ 382 : }
+ 383 :
+ 384 : bool operator !=(const value<Bits> &other) const {
+ 385 : return !(*this == other);
+ 386 : }
+ 387 :
+ 388 : value<Bits> bit_not() const {
+ 389 : value<Bits> result;
+ 390 : for (size_t n = 0; n < chunks; n++)
+ 391 : result.data[n] = ~data[n];
+ 392 : result.data[chunks - 1] &= msb_mask;
+ 393 : return result;
+ 394 : }
+ 395 :
+ 396 : value<Bits> bit_and(const value<Bits> &other) const {
+ 397 : value<Bits> result;
+ 398 : for (size_t n = 0; n < chunks; n++)
+ 399 : result.data[n] = data[n] & other.data[n];
+ 400 : return result;
+ 401 : }
+ 402 :
+ 403 : value<Bits> bit_or(const value<Bits> &other) const {
+ 404 : value<Bits> result;
+ 405 : for (size_t n = 0; n < chunks; n++)
+ 406 : result.data[n] = data[n] | other.data[n];
+ 407 : return result;
+ 408 : }
+ 409 :
+ 410 : value<Bits> bit_xor(const value<Bits> &other) const {
+ 411 : value<Bits> result;
+ 412 : for (size_t n = 0; n < chunks; n++)
+ 413 : result.data[n] = data[n] ^ other.data[n];
+ 414 : return result;
+ 415 : }
+ 416 :
+ 417 : value<Bits> update(const value<Bits> &val, const value<Bits> &mask) const {
+ 418 : return bit_and(mask.bit_not()).bit_or(val.bit_and(mask));
+ 419 : }
+ 420 :
+ 421 : template<size_t AmountBits>
+ 422 : value<Bits> shl(const value<AmountBits> &amount) const {
+ 423 : // Ensure our early return is correct by prohibiting values larger than 4 Gbit.
+ 424 : static_assert(Bits <= chunk::mask, "shl() of unreasonably large values is not supported");
+ 425 : // Detect shifts definitely large than Bits early.
+ 426 : for (size_t n = 1; n < amount.chunks; n++)
+ 427 : if (amount.data[n] != 0)
+ 428 : return {};
+ 429 : // Past this point we can use the least significant chunk as the shift size.
+ 430 : size_t shift_chunks = amount.data[0] / chunk::bits;
+ 431 : size_t shift_bits = amount.data[0] % chunk::bits;
+ 432 : if (shift_chunks >= chunks)
+ 433 : return {};
+ 434 : value<Bits> result;
+ 435 : chunk::type carry = 0;
+ 436 : for (size_t n = 0; n < chunks - shift_chunks; n++) {
+ 437 : result.data[shift_chunks + n] = (data[n] << shift_bits) | carry;
+ 438 : carry = (shift_bits == 0) ? 0
+ 439 : : data[n] >> (chunk::bits - shift_bits);
+ 440 : }
+ 441 : result.data[result.chunks - 1] &= result.msb_mask;
+ 442 : return result;
+ 443 : }
+ 444 :
+ 445 : template<size_t AmountBits, bool Signed = false>
+ 446 : value<Bits> shr(const value<AmountBits> &amount) const {
+ 447 : // Ensure our early return is correct by prohibiting values larger than 4 Gbit.
+ 448 : static_assert(Bits <= chunk::mask, "shr() of unreasonably large values is not supported");
+ 449 : // Detect shifts definitely large than Bits early.
+ 450 : for (size_t n = 1; n < amount.chunks; n++)
+ 451 : if (amount.data[n] != 0)
+ 452 : return (Signed && is_neg()) ? value<Bits>().bit_not() : value<Bits>();
+ 453 : // Past this point we can use the least significant chunk as the shift size.
+ 454 : size_t shift_chunks = amount.data[0] / chunk::bits;
+ 455 : size_t shift_bits = amount.data[0] % chunk::bits;
+ 456 : if (shift_chunks >= chunks)
+ 457 : return (Signed && is_neg()) ? value<Bits>().bit_not() : value<Bits>();
+ 458 : value<Bits> result;
+ 459 : chunk::type carry = 0;
+ 460 : for (size_t n = 0; n < chunks - shift_chunks; n++) {
+ 461 : result.data[chunks - shift_chunks - 1 - n] = carry | (data[chunks - 1 - n] >> shift_bits);
+ 462 : carry = (shift_bits == 0) ? 0
+ 463 : : data[chunks - 1 - n] << (chunk::bits - shift_bits);
+ 464 : }
+ 465 : if (Signed && is_neg()) {
+ 466 : size_t top_chunk_idx = amount.data[0] > Bits ? 0 : (Bits - amount.data[0]) / chunk::bits;
+ 467 : size_t top_chunk_bits = amount.data[0] > Bits ? 0 : (Bits - amount.data[0]) % chunk::bits;
+ 468 : for (size_t n = top_chunk_idx + 1; n < chunks; n++)
+ 469 : result.data[n] = chunk::mask;
+ 470 : if (amount.data[0] != 0)
+ 471 : result.data[top_chunk_idx] |= chunk::mask << top_chunk_bits;
+ 472 : result.data[result.chunks - 1] &= result.msb_mask;
+ 473 : }
+ 474 : return result;
+ 475 : }
+ 476 :
+ 477 : template<size_t AmountBits>
+ 478 : value<Bits> sshr(const value<AmountBits> &amount) const {
+ 479 : return shr<AmountBits, /*Signed=*/true>(amount);
+ 480 : }
+ 481 :
+ 482 : template<size_t ResultBits, size_t SelBits>
+ 483 : value<ResultBits> bmux(const value<SelBits> &sel) const {
+ 484 : static_assert(ResultBits << SelBits == Bits, "invalid sizes used in bmux()");
+ 485 : size_t amount = sel.data[0] * ResultBits;
+ 486 : size_t shift_chunks = amount / chunk::bits;
+ 487 : size_t shift_bits = amount % chunk::bits;
+ 488 : value<ResultBits> result;
+ 489 : chunk::type carry = 0;
+ 490 : if (ResultBits % chunk::bits + shift_bits > chunk::bits)
+ 491 : carry = data[result.chunks + shift_chunks] << (chunk::bits - shift_bits);
+ 492 : for (size_t n = 0; n < result.chunks; n++) {
+ 493 : result.data[result.chunks - 1 - n] = carry | (data[result.chunks + shift_chunks - 1 - n] >> shift_bits);
+ 494 : carry = (shift_bits == 0) ? 0
+ 495 : : data[result.chunks + shift_chunks - 1 - n] << (chunk::bits - shift_bits);
+ 496 : }
+ 497 : result.data[result.chunks - 1] &= result.msb_mask;
+ 498 : return result;
+ 499 : }
+ 500 :
+ 501 : template<size_t ResultBits, size_t SelBits>
+ 502 : value<ResultBits> demux(const value<SelBits> &sel) const {
+ 503 : static_assert(Bits << SelBits == ResultBits, "invalid sizes used in demux()");
+ 504 : size_t amount = sel.data[0] * Bits;
+ 505 : size_t shift_chunks = amount / chunk::bits;
+ 506 : size_t shift_bits = amount % chunk::bits;
+ 507 : value<ResultBits> result;
+ 508 : chunk::type carry = 0;
+ 509 : for (size_t n = 0; n < chunks; n++) {
+ 510 : result.data[shift_chunks + n] = (data[n] << shift_bits) | carry;
+ 511 : carry = (shift_bits == 0) ? 0
+ 512 : : data[n] >> (chunk::bits - shift_bits);
+ 513 : }
+ 514 : if (Bits % chunk::bits + shift_bits > chunk::bits)
+ 515 : result.data[shift_chunks + chunks] = carry;
+ 516 : return result;
+ 517 : }
+ 518 :
+ 519 : size_t ctpop() const {
+ 520 : size_t count = 0;
+ 521 : for (size_t n = 0; n < chunks; n++) {
+ 522 : // This loop implements the population count idiom as recognized by LLVM and GCC.
+ 523 : for (chunk::type x = data[n]; x != 0; count++)
+ 524 : x = x & (x - 1);
+ 525 : }
+ 526 : return count;
+ 527 : }
+ 528 :
+ 529 : size_t ctlz() const {
+ 530 : size_t count = 0;
+ 531 : for (size_t n = 0; n < chunks; n++) {
+ 532 : chunk::type x = data[chunks - 1 - n];
+ 533 : // First add to `count` as if the chunk is zero
+ 534 : constexpr size_t msb_chunk_bits = Bits % chunk::bits != 0 ? Bits % chunk::bits : chunk::bits;
+ 535 : count += (n == 0 ? msb_chunk_bits : chunk::bits);
+ 536 : // If the chunk isn't zero, correct the `count` value and return
+ 537 : if (x != 0) {
+ 538 : for (; x != 0; count--)
+ 539 : x >>= 1;
+ 540 : break;
+ 541 : }
+ 542 : }
+ 543 : return count;
+ 544 : }
+ 545 :
+ 546 : template<bool Invert, bool CarryIn>
+ 547 33 : std::pair<value<Bits>, bool /*CarryOut*/> alu(const value<Bits> &other) const {
+ 548 33 : value<Bits> result;
+ 549 33 : bool carry = CarryIn;
+ 550 66 : for (size_t n = 0; n < result.chunks; n++) {
+ 551 33 : result.data[n] = data[n] + (Invert ? ~other.data[n] : other.data[n]) + carry;
+ 552 : if (result.chunks - 1 == n)
+ 553 33 : result.data[result.chunks - 1] &= result.msb_mask;
+ 554 33 : carry = (result.data[n] < data[n]) ||
+ 555 : (result.data[n] == data[n] && carry);
+ 556 : }
+ 557 33 : return {result, carry};
+ 558 : }
+ 559 :
+ 560 33 : value<Bits> add(const value<Bits> &other) const {
+ 561 33 : return alu</*Invert=*/false, /*CarryIn=*/false>(other).first;
+ 562 : }
+ 563 :
+ 564 : value<Bits> sub(const value<Bits> &other) const {
+ 565 : return alu</*Invert=*/true, /*CarryIn=*/true>(other).first;
+ 566 : }
+ 567 :
+ 568 : value<Bits> neg() const {
+ 569 : return value<Bits>().sub(*this);
+ 570 : }
+ 571 :
+ 572 : bool ucmp(const value<Bits> &other) const {
+ 573 : bool carry;
+ 574 : std::tie(std::ignore, carry) = alu</*Invert=*/true, /*CarryIn=*/true>(other);
+ 575 : return !carry; // a.ucmp(b) ≡ a u< b
+ 576 : }
+ 577 :
+ 578 : bool scmp(const value<Bits> &other) const {
+ 579 : value<Bits> result;
+ 580 : bool carry;
+ 581 : std::tie(result, carry) = alu</*Invert=*/true, /*CarryIn=*/true>(other);
+ 582 : bool overflow = (is_neg() == !other.is_neg()) && (is_neg() != result.is_neg());
+ 583 : return result.is_neg() ^ overflow; // a.scmp(b) ≡ a s< b
+ 584 : }
+ 585 :
+ 586 : template<size_t ResultBits>
+ 587 : value<ResultBits> mul(const value<Bits> &other) const {
+ 588 : value<ResultBits> result;
+ 589 : wide_chunk_t wide_result[result.chunks + 1] = {};
+ 590 : for (size_t n = 0; n < chunks; n++) {
+ 591 : for (size_t m = 0; m < chunks && n + m < result.chunks; m++) {
+ 592 : wide_result[n + m] += wide_chunk_t(data[n]) * wide_chunk_t(other.data[m]);
+ 593 : wide_result[n + m + 1] += wide_result[n + m] >> chunk::bits;
+ 594 : wide_result[n + m] &= chunk::mask;
+ 595 : }
+ 596 : }
+ 597 : for (size_t n = 0; n < result.chunks; n++) {
+ 598 : result.data[n] = wide_result[n];
+ 599 : }
+ 600 : result.data[result.chunks - 1] &= result.msb_mask;
+ 601 : return result;
+ 602 : }
+ 603 :
+ 604 : std::pair<value<Bits>, value<Bits>> udivmod(value<Bits> divisor) const {
+ 605 : value<Bits> quotient;
+ 606 : value<Bits> dividend = *this;
+ 607 : if (dividend.ucmp(divisor))
+ 608 : return {/*quotient=*/value<Bits>{0u}, /*remainder=*/dividend};
+ 609 : int64_t divisor_shift = divisor.ctlz() - dividend.ctlz();
+ 610 : assert(divisor_shift >= 0);
+ 611 : divisor = divisor.shl(value<Bits>{(chunk::type) divisor_shift});
+ 612 : for (size_t step = 0; step <= divisor_shift; step++) {
+ 613 : quotient = quotient.shl(value<Bits>{1u});
+ 614 : if (!dividend.ucmp(divisor)) {
+ 615 : dividend = dividend.sub(divisor);
+ 616 : quotient.set_bit(0, true);
+ 617 : }
+ 618 : divisor = divisor.shr(value<Bits>{1u});
+ 619 : }
+ 620 : return {quotient, /*remainder=*/dividend};
+ 621 : }
+ 622 :
+ 623 : std::pair<value<Bits>, value<Bits>> sdivmod(const value<Bits> &other) const {
+ 624 : value<Bits + 1> quotient;
+ 625 : value<Bits + 1> remainder;
+ 626 : value<Bits + 1> dividend = sext<Bits + 1>();
+ 627 : value<Bits + 1> divisor = other.template sext<Bits + 1>();
+ 628 : if (is_neg()) dividend = dividend.neg();
+ 629 : if (other.is_neg()) divisor = divisor.neg();
+ 630 : std::tie(quotient, remainder) = dividend.udivmod(divisor);
+ 631 : if (is_neg() != other.is_neg()) quotient = quotient.neg();
+ 632 : if (is_neg()) remainder = remainder.neg();
+ 633 : return {quotient.template trunc<Bits>(), remainder.template trunc<Bits>()};
+ 634 : }
+ 635 : };
+ 636 :
+ 637 : // Expression template for a slice, usable as lvalue or rvalue, and composable with other expression templates here.
+ 638 : template<class T, size_t Stop, size_t Start>
+ 639 : struct slice_expr : public expr_base<slice_expr<T, Stop, Start>> {
+ 640 : static_assert(Stop >= Start, "slice_expr() may not reverse bit order");
+ 641 : static_assert(Start < T::bits && Stop < T::bits, "slice_expr() must be within bounds");
+ 642 : static constexpr size_t bits = Stop - Start + 1;
+ 643 :
+ 644 : T &expr;
+ 645 :
+ 646 : slice_expr(T &expr) : expr(expr) {}
+ 647 : slice_expr(const slice_expr<T, Stop, Start> &) = delete;
+ 648 :
+ 649 : CXXRTL_ALWAYS_INLINE
+ 650 : operator value<bits>() const {
+ 651 : return static_cast<const value<T::bits> &>(expr)
+ 652 : .template rtrunc<T::bits - Start>()
+ 653 : .template trunc<bits>();
+ 654 : }
+ 655 :
+ 656 : CXXRTL_ALWAYS_INLINE
+ 657 : slice_expr<T, Stop, Start> &operator=(const value<bits> &rhs) {
+ 658 : // Generic partial assignment implemented using a read-modify-write operation on the sliced expression.
+ 659 : expr = static_cast<const value<T::bits> &>(expr)
+ 660 : .template blit<Stop, Start>(rhs);
+ 661 : return *this;
+ 662 : }
+ 663 :
+ 664 : // A helper that forces the cast to value<>, which allows deduction to work.
+ 665 : CXXRTL_ALWAYS_INLINE
+ 666 : value<bits> val() const {
+ 667 : return static_cast<const value<bits> &>(*this);
+ 668 : }
+ 669 : };
+ 670 :
+ 671 : // Expression template for a concatenation, usable as lvalue or rvalue, and composable with other expression templates here.
+ 672 : template<class T, class U>
+ 673 : struct concat_expr : public expr_base<concat_expr<T, U>> {
+ 674 : static constexpr size_t bits = T::bits + U::bits;
+ 675 :
+ 676 : T &ms_expr;
+ 677 : U &ls_expr;
+ 678 :
+ 679 : concat_expr(T &ms_expr, U &ls_expr) : ms_expr(ms_expr), ls_expr(ls_expr) {}
+ 680 : concat_expr(const concat_expr<T, U> &) = delete;
+ 681 :
+ 682 : CXXRTL_ALWAYS_INLINE
+ 683 : operator value<bits>() const {
+ 684 : value<bits> ms_shifted = static_cast<const value<T::bits> &>(ms_expr)
+ 685 : .template rzext<bits>();
+ 686 : value<bits> ls_extended = static_cast<const value<U::bits> &>(ls_expr)
+ 687 : .template zext<bits>();
+ 688 : return ms_shifted.bit_or(ls_extended);
+ 689 : }
+ 690 :
+ 691 : CXXRTL_ALWAYS_INLINE
+ 692 : concat_expr<T, U> &operator=(const value<bits> &rhs) {
+ 693 : ms_expr = rhs.template rtrunc<T::bits>();
+ 694 : ls_expr = rhs.template trunc<U::bits>();
+ 695 : return *this;
+ 696 : }
+ 697 :
+ 698 : // A helper that forces the cast to value<>, which allows deduction to work.
+ 699 : CXXRTL_ALWAYS_INLINE
+ 700 : value<bits> val() const {
+ 701 : return static_cast<const value<bits> &>(*this);
+ 702 : }
+ 703 : };
+ 704 :
+ 705 : // Base class for expression templates, providing helper methods for operations that are valid on both rvalues and lvalues.
+ 706 : //
+ 707 : // Note that expression objects (slices and concatenations) constructed in this way should NEVER be captured because
+ 708 : // they refer to temporaries that will, in general, only live until the end of the statement. For example, both of
+ 709 : // these snippets perform use-after-free:
+ 710 : //
+ 711 : // const auto &a = val.slice<7,0>().slice<1>();
+ 712 : // value<1> b = a;
+ 713 : //
+ 714 : // auto &&c = val.slice<7,0>().slice<1>();
+ 715 : // c = value<1>{1u};
+ 716 : //
+ 717 : // An easy way to write code using slices and concatenations safely is to follow two simple rules:
+ 718 : // * Never explicitly name any type except `value<W>` or `const value<W> &`.
+ 719 : // * Never use a `const auto &` or `auto &&` in any such expression.
+ 720 : // Then, any code that compiles will be well-defined.
+ 721 : template<class T>
+ 722 : struct expr_base {
+ 723 : template<size_t Stop, size_t Start = Stop>
+ 724 : CXXRTL_ALWAYS_INLINE
+ 725 : slice_expr<const T, Stop, Start> slice() const {
+ 726 : return {*static_cast<const T *>(this)};
+ 727 : }
+ 728 :
+ 729 : template<size_t Stop, size_t Start = Stop>
+ 730 : CXXRTL_ALWAYS_INLINE
+ 731 : slice_expr<T, Stop, Start> slice() {
+ 732 : return {*static_cast<T *>(this)};
+ 733 : }
+ 734 :
+ 735 : template<class U>
+ 736 : CXXRTL_ALWAYS_INLINE
+ 737 : concat_expr<const T, typename std::remove_reference<const U>::type> concat(const U &other) const {
+ 738 : return {*static_cast<const T *>(this), other};
+ 739 : }
+ 740 :
+ 741 : template<class U>
+ 742 : CXXRTL_ALWAYS_INLINE
+ 743 : concat_expr<T, typename std::remove_reference<U>::type> concat(U &&other) {
+ 744 : return {*static_cast<T *>(this), other};
+ 745 : }
+ 746 : };
+ 747 :
+ 748 : template<size_t Bits>
+ 749 : std::ostream &operator<<(std::ostream &os, const value<Bits> &val) {
+ 750 : auto old_flags = os.flags(std::ios::right);
+ 751 : auto old_width = os.width(0);
+ 752 : auto old_fill = os.fill('0');
+ 753 : os << val.bits << '\'' << std::hex;
+ 754 : for (size_t n = val.chunks - 1; n != (size_t)-1; n--) {
+ 755 : if (n == val.chunks - 1 && Bits % value<Bits>::chunk::bits != 0)
+ 756 : os.width((Bits % value<Bits>::chunk::bits + 3) / 4);
+ 757 : else
+ 758 : os.width((value<Bits>::chunk::bits + 3) / 4);
+ 759 : os << val.data[n];
+ 760 : }
+ 761 : os.fill(old_fill);
+ 762 : os.width(old_width);
+ 763 : os.flags(old_flags);
+ 764 : return os;
+ 765 : }
+ 766 :
+ 767 : template<size_t Bits>
+ 768 : struct wire {
+ 769 : static constexpr size_t bits = Bits;
+ 770 :
+ 771 : value<Bits> curr;
+ 772 : value<Bits> next;
+ 773 :
+ 774 : wire() = default;
+ 775 : explicit constexpr wire(const value<Bits> &init) : curr(init), next(init) {}
+ 776 : template<typename... Init>
+ 777 : explicit constexpr wire(Init ...init) : curr{init...}, next{init...} {}
+ 778 :
+ 779 : // Copying and copy-assigning values is natural. If, however, a value is replaced with a wire,
+ 780 : // e.g. because a module is built with a different optimization level, then existing code could
+ 781 : // unintentionally copy a wire instead, which would create a subtle but serious bug. To make sure
+ 782 : // this doesn't happen, prohibit copying and copy-assigning wires.
+ 783 : wire(const wire<Bits> &) = delete;
+ 784 : wire<Bits> &operator=(const wire<Bits> &) = delete;
+ 785 :
+ 786 : wire(wire<Bits> &&) = default;
+ 787 : wire<Bits> &operator=(wire<Bits> &&) = default;
+ 788 :
+ 789 : template<class IntegerT>
+ 790 : CXXRTL_ALWAYS_INLINE
+ 791 : IntegerT get() const {
+ 792 : return curr.template get<IntegerT>();
+ 793 : }
+ 794 :
+ 795 : template<class IntegerT>
+ 796 : CXXRTL_ALWAYS_INLINE
+ 797 : void set(IntegerT other) {
+ 798 : next.template set<IntegerT>(other);
+ 799 : }
+ 800 :
+ 801 : // This method intentionally takes a mandatory argument (to make it more difficult to misuse in
+ 802 : // black box implementations, leading to missed observer events). It is generic over its argument
+ 803 : // to allow the `on_update` method to be non-virtual.
+ 804 : template<class ObserverT>
+ 805 : bool commit(ObserverT &observer) {
+ 806 : if (curr != next) {
+ 807 : observer.on_update(curr.chunks, curr.data, next.data);
+ 808 : curr = next;
+ 809 : return true;
+ 810 : }
+ 811 : return false;
+ 812 : }
+ 813 : };
+ 814 :
+ 815 : template<size_t Bits>
+ 816 : std::ostream &operator<<(std::ostream &os, const wire<Bits> &val) {
+ 817 : os << val.curr;
+ 818 : return os;
+ 819 : }
+ 820 :
+ 821 : template<size_t Width>
+ 822 : struct memory {
+ 823 : const size_t depth;
+ 824 : std::unique_ptr<value<Width>[]> data;
+ 825 :
+ 826 : explicit memory(size_t depth) : depth(depth), data(new value<Width>[depth]) {}
+ 827 :
+ 828 : memory(const memory<Width> &) = delete;
+ 829 : memory<Width> &operator=(const memory<Width> &) = delete;
+ 830 :
+ 831 : memory(memory<Width> &&) = default;
+ 832 : memory<Width> &operator=(memory<Width> &&other) {
+ 833 : assert(depth == other.depth);
+ 834 : data = std::move(other.data);
+ 835 : write_queue = std::move(other.write_queue);
+ 836 : return *this;
+ 837 : }
+ 838 :
+ 839 : // An operator for direct memory reads. May be used at any time during the simulation.
+ 840 : const value<Width> &operator [](size_t index) const {
+ 841 : assert(index < depth);
+ 842 : return data[index];
+ 843 : }
+ 844 :
+ 845 : // An operator for direct memory writes. May only be used before the simulation is started. If used
+ 846 : // after the simulation is started, the design may malfunction.
+ 847 : value<Width> &operator [](size_t index) {
+ 848 : assert(index < depth);
+ 849 : return data[index];
+ 850 : }
+ 851 :
+ 852 : // A simple way to make a writable memory would be to use an array of wires instead of an array of values.
+ 853 : // However, there are two significant downsides to this approach: first, it has large overhead (2× space
+ 854 : // overhead, and O(depth) time overhead during commit); second, it does not simplify handling write port
+ 855 : // priorities. Although in principle write ports could be ordered or conditionally enabled in generated
+ 856 : // code based on their priorities and selected addresses, the feedback arc set problem is computationally
+ 857 : // expensive, and the heuristic based algorithms are not easily modified to guarantee (rather than prefer)
+ 858 : // a particular write port evaluation order.
+ 859 : //
+ 860 : // The approach used here instead is to queue writes into a buffer during the eval phase, then perform
+ 861 : // the writes during the commit phase in the priority order. This approach has low overhead, with both space
+ 862 : // and time proportional to the amount of write ports. Because virtually every memory in a practical design
+ 863 : // has at most two write ports, linear search is used on every write, being the fastest and simplest approach.
+ 864 : struct write {
+ 865 : size_t index;
+ 866 : value<Width> val;
+ 867 : value<Width> mask;
+ 868 : int priority;
+ 869 : };
+ 870 : std::vector<write> write_queue;
+ 871 :
+ 872 : void update(size_t index, const value<Width> &val, const value<Width> &mask, int priority = 0) {
+ 873 : assert(index < depth);
+ 874 : // Queue up the write while keeping the queue sorted by priority.
+ 875 : write_queue.insert(
+ 876 : std::upper_bound(write_queue.begin(), write_queue.end(), priority,
+ 877 : [](const int a, const write& b) { return a < b.priority; }),
+ 878 : write { index, val, mask, priority });
+ 879 : }
+ 880 :
+ 881 : // See the note for `wire::commit()`.
+ 882 : template<class ObserverT>
+ 883 : bool commit(ObserverT &observer) {
+ 884 : bool changed = false;
+ 885 : for (const write &entry : write_queue) {
+ 886 : value<Width> elem = data[entry.index];
+ 887 : elem = elem.update(entry.val, entry.mask);
+ 888 : if (data[entry.index] != elem) {
+ 889 : observer.on_update(value<Width>::chunks, data[0].data, elem.data, entry.index);
+ 890 : changed |= true;
+ 891 : }
+ 892 : data[entry.index] = elem;
+ 893 : }
+ 894 : write_queue.clear();
+ 895 : return changed;
+ 896 : }
+ 897 : };
+ 898 :
+ 899 27 : struct metadata {
+ 900 : const enum {
+ 901 : MISSING = 0,
+ 902 : UINT = 1,
+ 903 : SINT = 2,
+ 904 : STRING = 3,
+ 905 : DOUBLE = 4,
+ 906 : } value_type;
+ 907 :
+ 908 : // In debug mode, using the wrong .as_*() function will assert.
+ 909 : // In release mode, using the wrong .as_*() function will safely return a default value.
+ 910 : const uint64_t uint_value = 0;
+ 911 : const int64_t sint_value = 0;
+ 912 : const std::string string_value = "";
+ 913 : const double double_value = 0.0;
+ 914 :
+ 915 : metadata() : value_type(MISSING) {}
+ 916 0 : metadata(uint64_t value) : value_type(UINT), uint_value(value) {}
+ 917 0 : metadata(int64_t value) : value_type(SINT), sint_value(value) {}
+ 918 9 : metadata(const std::string &value) : value_type(STRING), string_value(value) {}
+ 919 0 : metadata(const char *value) : value_type(STRING), string_value(value) {}
+ 920 0 : metadata(double value) : value_type(DOUBLE), double_value(value) {}
+ 921 :
+ 922 18 : metadata(const metadata &) = default;
+ 923 : metadata &operator=(const metadata &) = delete;
+ 924 :
+ 925 : uint64_t as_uint() const {
+ 926 : assert(value_type == UINT);
+ 927 : return uint_value;
+ 928 : }
+ 929 :
+ 930 : int64_t as_sint() const {
+ 931 : assert(value_type == SINT);
+ 932 : return sint_value;
+ 933 : }
+ 934 :
+ 935 : const std::string &as_string() const {
+ 936 : assert(value_type == STRING);
+ 937 : return string_value;
+ 938 : }
+ 939 :
+ 940 : double as_double() const {
+ 941 : assert(value_type == DOUBLE);
+ 942 : return double_value;
+ 943 : }
+ 944 :
+ 945 : // Internal CXXRTL use only.
+ 946 9 : static std::map<std::string, metadata> deserialize(const char *ptr) {
+ 947 9 : std::map<std::string, metadata> result;
+ 948 9 : std::string name;
+ 949 : // Grammar:
+ 950 : // string ::= [^\0]+ \0
+ 951 : // metadata ::= [uid] .{8} | s <string>
+ 952 : // map ::= ( <string> <metadata> )* \0
+ 953 45 : for (;;) {
+ 954 45 : if (*ptr) {
+ 955 45 : name += *ptr++;
+ 956 18 : } else if (!name.empty()) {
+ 957 9 : ptr++;
+ 958 9 : auto get_u64 = [&]() {
+ 959 0 : uint64_t result = 0;
+ 960 0 : for (size_t count = 0; count < 8; count++)
+ 961 0 : result = (result << 8) | *ptr++;
+ 962 0 : return result;
+ 963 9 : };
+ 964 9 : char type = *ptr++;
+ 965 9 : if (type == 'u') {
+ 966 0 : uint64_t value = get_u64();
+ 967 0 : result.emplace(name, value);
+ 968 9 : } else if (type == 'i') {
+ 969 0 : int64_t value = (int64_t)get_u64();
+ 970 0 : result.emplace(name, value);
+ 971 9 : } else if (type == 'd') {
+ 972 0 : double dvalue;
+ 973 0 : uint64_t uvalue = get_u64();
+ 974 0 : static_assert(sizeof(dvalue) == sizeof(uvalue), "double must be 64 bits in size");
+ 975 0 : memcpy(&dvalue, &uvalue, sizeof(dvalue));
+ 976 0 : result.emplace(name, dvalue);
+ 977 9 : } else if (type == 's') {
+ 978 9 : std::string value;
+ 979 9 : while (*ptr)
+ 980 216 : value += *ptr++;
+ 981 9 : ptr++;
+ 982 9 : result.emplace(name, value);
+ 983 9 : } else {
+ 984 0 : assert(false && "Unknown type specifier");
+ 985 9 : return result;
+ 986 : }
+ 987 9 : name.clear();
+ 988 : } else {
+ 989 9 : return result;
+ 990 : }
+ 991 : }
+ 992 9 : }
+ 993 : };
+ 994 :
+ 995 : typedef std::map<std::string, metadata> metadata_map;
+ 996 :
+ 997 : struct performer;
+ 998 :
+ 999 : // An object that allows formatting a string lazily.
+ 1000 : struct lazy_fmt {
+ 1001 : virtual std::string operator() () const = 0;
+ 1002 : };
+ 1003 :
+ 1004 : // Flavor of a `$check` cell.
+ 1005 : enum class flavor {
+ 1006 : // Corresponds to a `$assert` cell in other flows, and a Verilog `assert ()` statement.
+ 1007 : ASSERT,
+ 1008 : // Corresponds to a `$assume` cell in other flows, and a Verilog `assume ()` statement.
+ 1009 : ASSUME,
+ 1010 : // Corresponds to a `$live` cell in other flows, and a Verilog `assert (eventually)` statement.
+ 1011 : ASSERT_EVENTUALLY,
+ 1012 : // Corresponds to a `$fair` cell in other flows, and a Verilog `assume (eventually)` statement.
+ 1013 : ASSUME_EVENTUALLY,
+ 1014 : // Corresponds to a `$cover` cell in other flows, and a Verilog `cover ()` statement.
+ 1015 : COVER,
+ 1016 : };
+ 1017 :
+ 1018 : // An object that can be passed to a `eval()` method in order to act on side effects. The default behavior implemented
+ 1019 : // below is the same as the behavior of `eval(nullptr)`, except that `-print-output` option of `write_cxxrtl` is not
+ 1020 : // taken into account.
+ 1021 : struct performer {
+ 1022 : // Called by generated formatting code to evaluate a Verilog `$time` expression.
+ 1023 : virtual int64_t vlog_time() const { return 0; }
+ 1024 :
+ 1025 : // Called by generated formatting code to evaluate a Verilog `$realtime` expression.
+ 1026 : virtual double vlog_realtime() const { return vlog_time(); }
+ 1027 :
+ 1028 : // Called when a `$print` cell is triggered.
+ 1029 : virtual void on_print(const lazy_fmt &formatter, const metadata_map &attributes) {
+ 1030 : std::cout << formatter();
+ 1031 : }
+ 1032 :
+ 1033 : // Called when a `$check` cell is triggered.
+ 1034 : virtual void on_check(flavor type, bool condition, const lazy_fmt &formatter, const metadata_map &attributes) {
+ 1035 : if (type == flavor::ASSERT || type == flavor::ASSUME) {
+ 1036 : if (!condition)
+ 1037 : std::cerr << formatter();
+ 1038 : CXXRTL_ASSERT(condition && "Check failed");
+ 1039 : }
+ 1040 : }
+ 1041 : };
+ 1042 :
+ 1043 : // An object that can be passed to a `commit()` method in order to produce a replay log of every state change in
+ 1044 : // the simulation. Unlike `performer`, `observer` does not use virtual calls as their overhead is unacceptable, and
+ 1045 : // a comparatively heavyweight template-based solution is justified.
+ 1046 : struct observer {
+ 1047 : // Called when the `commit()` method for a wire is about to update the `chunks` chunks at `base` with `chunks` chunks
+ 1048 : // at `value` that have a different bit pattern. It is guaranteed that `chunks` is equal to the wire chunk count and
+ 1049 : // `base` points to the first chunk.
+ 1050 : void on_update(size_t chunks, const chunk_t *base, const chunk_t *value) {}
+ 1051 :
+ 1052 : // Called when the `commit()` method for a memory is about to update the `chunks` chunks at `&base[chunks * index]`
+ 1053 : // with `chunks` chunks at `value` that have a different bit pattern. It is guaranteed that `chunks` is equal to
+ 1054 : // the memory element chunk count and `base` points to the first chunk of the first element of the memory.
+ 1055 : void on_update(size_t chunks, const chunk_t *base, const chunk_t *value, size_t index) {}
+ 1056 : };
+ 1057 :
+ 1058 : // Must be kept in sync with `struct FmtPart` in kernel/fmt.h!
+ 1059 : // Default member initializers would make this a non-aggregate-type in C++11, so they are commented out.
+ 1060 : struct fmt_part {
+ 1061 : enum {
+ 1062 : LITERAL = 0,
+ 1063 : INTEGER = 1,
+ 1064 : STRING = 2,
+ 1065 : UNICHAR = 3,
+ 1066 : VLOG_TIME = 4,
+ 1067 : } type;
+ 1068 :
+ 1069 : // LITERAL type
+ 1070 : std::string str;
+ 1071 :
+ 1072 : // INTEGER/STRING/UNICHAR types
+ 1073 : // + value<Bits> val;
+ 1074 :
+ 1075 : // INTEGER/STRING/VLOG_TIME types
+ 1076 : enum {
+ 1077 : RIGHT = 0,
+ 1078 : LEFT = 1,
+ 1079 : NUMERIC = 2,
+ 1080 : } justify; // = RIGHT;
+ 1081 : char padding; // = '\0';
+ 1082 : size_t width; // = 0;
+ 1083 :
+ 1084 : // INTEGER type
+ 1085 : unsigned base; // = 10;
+ 1086 : bool signed_; // = false;
+ 1087 : enum {
+ 1088 : MINUS = 0,
+ 1089 : PLUS_MINUS = 1,
+ 1090 : SPACE_MINUS = 2,
+ 1091 : } sign; // = MINUS;
+ 1092 : bool hex_upper; // = false;
+ 1093 : bool show_base; // = false;
+ 1094 : bool group; // = false;
+ 1095 :
+ 1096 : // VLOG_TIME type
+ 1097 : bool realtime; // = false;
+ 1098 : // + int64_t itime;
+ 1099 : // + double ftime;
+ 1100 :
+ 1101 : // Format the part as a string.
+ 1102 : //
+ 1103 : // The values of `vlog_time` and `vlog_realtime` are used for Verilog `$time` and `$realtime`, correspondingly.
+ 1104 : template<size_t Bits>
+ 1105 : std::string render(value<Bits> val, performer *performer = nullptr)
+ 1106 : {
+ 1107 : // We might want to replace some of these bit() calls with direct
+ 1108 : // chunk access if it turns out to be slow enough to matter.
+ 1109 : std::string buf;
+ 1110 : std::string prefix;
+ 1111 : switch (type) {
+ 1112 : case LITERAL:
+ 1113 : return str;
+ 1114 :
+ 1115 : case STRING: {
+ 1116 : buf.reserve(Bits/8);
+ 1117 : for (int i = 0; i < Bits; i += 8) {
+ 1118 : char ch = 0;
+ 1119 : for (int j = 0; j < 8 && i + j < int(Bits); j++)
+ 1120 : if (val.bit(i + j))
+ 1121 : ch |= 1 << j;
+ 1122 : if (ch != 0)
+ 1123 : buf.append({ch});
+ 1124 : }
+ 1125 : std::reverse(buf.begin(), buf.end());
+ 1126 : break;
+ 1127 : }
+ 1128 :
+ 1129 : case UNICHAR: {
+ 1130 : uint32_t codepoint = val.template get<uint32_t>();
+ 1131 : if (codepoint >= 0x10000)
+ 1132 : buf += (char)(0xf0 | (codepoint >> 18));
+ 1133 : else if (codepoint >= 0x800)
+ 1134 : buf += (char)(0xe0 | (codepoint >> 12));
+ 1135 : else if (codepoint >= 0x80)
+ 1136 : buf += (char)(0xc0 | (codepoint >> 6));
+ 1137 : else
+ 1138 : buf += (char)codepoint;
+ 1139 : if (codepoint >= 0x10000)
+ 1140 : buf += (char)(0x80 | ((codepoint >> 12) & 0x3f));
+ 1141 : if (codepoint >= 0x800)
+ 1142 : buf += (char)(0x80 | ((codepoint >> 6) & 0x3f));
+ 1143 : if (codepoint >= 0x80)
+ 1144 : buf += (char)(0x80 | ((codepoint >> 0) & 0x3f));
+ 1145 : break;
+ 1146 : }
+ 1147 :
+ 1148 : case INTEGER: {
+ 1149 : bool negative = signed_ && val.is_neg();
+ 1150 : if (negative) {
+ 1151 : prefix = "-";
+ 1152 : val = val.neg();
+ 1153 : } else {
+ 1154 : switch (sign) {
+ 1155 : case MINUS: break;
+ 1156 : case PLUS_MINUS: prefix = "+"; break;
+ 1157 : case SPACE_MINUS: prefix = " "; break;
+ 1158 : }
+ 1159 : }
+ 1160 :
+ 1161 : size_t val_width = Bits;
+ 1162 : if (base != 10) {
+ 1163 : val_width = 1;
+ 1164 : for (size_t index = 0; index < Bits; index++)
+ 1165 : if (val.bit(index))
+ 1166 : val_width = index + 1;
+ 1167 : }
+ 1168 :
+ 1169 : if (base == 2) {
+ 1170 : if (show_base)
+ 1171 : prefix += "0b";
+ 1172 : for (size_t index = 0; index < val_width; index++) {
+ 1173 : if (group && index > 0 && index % 4 == 0)
+ 1174 : buf += '_';
+ 1175 : buf += (val.bit(index) ? '1' : '0');
+ 1176 : }
+ 1177 : } else if (base == 8 || base == 16) {
+ 1178 : if (show_base)
+ 1179 : prefix += (base == 16) ? (hex_upper ? "0X" : "0x") : "0o";
+ 1180 : size_t step = (base == 16) ? 4 : 3;
+ 1181 : for (size_t index = 0; index < val_width; index += step) {
+ 1182 : if (group && index > 0 && index % (4 * step) == 0)
+ 1183 : buf += '_';
+ 1184 : uint8_t value = val.bit(index) | (val.bit(index + 1) << 1) | (val.bit(index + 2) << 2);
+ 1185 : if (step == 4)
+ 1186 : value |= val.bit(index + 3) << 3;
+ 1187 : buf += (hex_upper ? "0123456789ABCDEF" : "0123456789abcdef")[value];
+ 1188 : }
+ 1189 : } else if (base == 10) {
+ 1190 : if (show_base)
+ 1191 : prefix += "0d";
+ 1192 : if (val.is_zero())
+ 1193 : buf += '0';
+ 1194 : value<(Bits > 4 ? Bits : 4)> xval = val.template zext<(Bits > 4 ? Bits : 4)>();
+ 1195 : size_t index = 0;
+ 1196 : while (!xval.is_zero()) {
+ 1197 : if (group && index > 0 && index % 3 == 0)
+ 1198 : buf += '_';
+ 1199 : value<(Bits > 4 ? Bits : 4)> quotient, remainder;
+ 1200 : if (Bits >= 4)
+ 1201 : std::tie(quotient, remainder) = xval.udivmod(value<(Bits > 4 ? Bits : 4)>{10u});
+ 1202 : else
+ 1203 : std::tie(quotient, remainder) = std::make_pair(value<(Bits > 4 ? Bits : 4)>{0u}, xval);
+ 1204 : buf += '0' + remainder.template trunc<4>().template get<uint8_t>();
+ 1205 : xval = quotient;
+ 1206 : index++;
+ 1207 : }
+ 1208 : } else assert(false && "Unsupported base for fmt_part");
+ 1209 : if (justify == NUMERIC && group && padding == '0') {
+ 1210 : int group_size = base == 10 ? 3 : 4;
+ 1211 : while (prefix.size() + buf.size() < width) {
+ 1212 : if (buf.size() % (group_size + 1) == group_size)
+ 1213 : buf += '_';
+ 1214 : buf += '0';
+ 1215 : }
+ 1216 : }
+ 1217 : std::reverse(buf.begin(), buf.end());
+ 1218 : break;
+ 1219 : }
+ 1220 :
+ 1221 : case VLOG_TIME: {
+ 1222 : if (performer) {
+ 1223 : buf = realtime ? std::to_string(performer->vlog_realtime()) : std::to_string(performer->vlog_time());
+ 1224 : } else {
+ 1225 : buf = realtime ? std::to_string(0.0) : std::to_string(0);
+ 1226 : }
+ 1227 : break;
+ 1228 : }
+ 1229 : }
+ 1230 :
+ 1231 : std::string str;
+ 1232 : assert(width == 0 || padding != '\0');
+ 1233 : if (prefix.size() + buf.size() < width) {
+ 1234 : size_t pad_width = width - prefix.size() - buf.size();
+ 1235 : switch (justify) {
+ 1236 : case LEFT:
+ 1237 : str += prefix;
+ 1238 : str += buf;
+ 1239 : str += std::string(pad_width, padding);
+ 1240 : break;
+ 1241 : case RIGHT:
+ 1242 : str += std::string(pad_width, padding);
+ 1243 : str += prefix;
+ 1244 : str += buf;
+ 1245 : break;
+ 1246 : case NUMERIC:
+ 1247 : str += prefix;
+ 1248 : str += std::string(pad_width, padding);
+ 1249 : str += buf;
+ 1250 : break;
+ 1251 : }
+ 1252 : } else {
+ 1253 : str += prefix;
+ 1254 : str += buf;
+ 1255 : }
+ 1256 : return str;
+ 1257 : }
+ 1258 : };
+ 1259 :
+ 1260 : // Tag class to disambiguate values/wires and their aliases.
+ 1261 : struct debug_alias {};
+ 1262 :
+ 1263 : // Tag declaration to disambiguate values and debug outlines.
+ 1264 : using debug_outline = ::_cxxrtl_outline;
+ 1265 :
+ 1266 : // This structure is intended for consumption via foreign function interfaces, like Python's ctypes.
+ 1267 : // Because of this it uses a C-style layout that is easy to parse rather than more idiomatic C++.
+ 1268 : //
+ 1269 : // To avoid violating strict aliasing rules, this structure has to be a subclass of the one used
+ 1270 : // in the C API, or it would not be possible to cast between the pointers to these.
+ 1271 : //
+ 1272 : // The `attrs` member cannot be owned by this structure because a `cxxrtl_object` can be created
+ 1273 : // from external C code.
+ 1274 : struct debug_item : ::cxxrtl_object {
+ 1275 : // Object types.
+ 1276 : enum : uint32_t {
+ 1277 : VALUE = CXXRTL_VALUE,
+ 1278 : WIRE = CXXRTL_WIRE,
+ 1279 : MEMORY = CXXRTL_MEMORY,
+ 1280 : ALIAS = CXXRTL_ALIAS,
+ 1281 : OUTLINE = CXXRTL_OUTLINE,
+ 1282 : };
+ 1283 :
+ 1284 : // Object flags.
+ 1285 : enum : uint32_t {
+ 1286 : INPUT = CXXRTL_INPUT,
+ 1287 : OUTPUT = CXXRTL_OUTPUT,
+ 1288 : INOUT = CXXRTL_INOUT,
+ 1289 : DRIVEN_SYNC = CXXRTL_DRIVEN_SYNC,
+ 1290 : DRIVEN_COMB = CXXRTL_DRIVEN_COMB,
+ 1291 : UNDRIVEN = CXXRTL_UNDRIVEN,
+ 1292 : };
+ 1293 :
+ 1294 : debug_item(const ::cxxrtl_object &object) : cxxrtl_object(object) {}
+ 1295 :
+ 1296 : template<size_t Bits>
+ 1297 9 : debug_item(value<Bits> &item, size_t lsb_offset = 0, uint32_t flags_ = 0) {
+ 1298 : static_assert(Bits == 0 || sizeof(item) == value<Bits>::chunks * sizeof(chunk_t),
+ 1299 : "value<Bits> is not compatible with C layout");
+ 1300 9 : type = VALUE;
+ 1301 9 : flags = flags_;
+ 1302 9 : width = Bits;
+ 1303 9 : lsb_at = lsb_offset;
+ 1304 9 : depth = 1;
+ 1305 9 : zero_at = 0;
+ 1306 9 : curr = item.data;
+ 1307 9 : next = item.data;
+ 1308 9 : outline = nullptr;
+ 1309 9 : attrs = nullptr;
+ 1310 : }
+ 1311 :
+ 1312 : template<size_t Bits>
+ 1313 : debug_item(const value<Bits> &item, size_t lsb_offset = 0) {
+ 1314 : static_assert(Bits == 0 || sizeof(item) == value<Bits>::chunks * sizeof(chunk_t),
+ 1315 : "value<Bits> is not compatible with C layout");
+ 1316 : type = VALUE;
+ 1317 : flags = DRIVEN_COMB;
+ 1318 : width = Bits;
+ 1319 : lsb_at = lsb_offset;
+ 1320 : depth = 1;
+ 1321 : zero_at = 0;
+ 1322 : curr = const_cast<chunk_t*>(item.data);
+ 1323 : next = nullptr;
+ 1324 : outline = nullptr;
+ 1325 : attrs = nullptr;
+ 1326 : }
+ 1327 :
+ 1328 : template<size_t Bits>
+ 1329 : debug_item(wire<Bits> &item, size_t lsb_offset = 0, uint32_t flags_ = 0) {
+ 1330 : static_assert(Bits == 0 ||
+ 1331 : (sizeof(item.curr) == value<Bits>::chunks * sizeof(chunk_t) &&
+ 1332 : sizeof(item.next) == value<Bits>::chunks * sizeof(chunk_t)),
+ 1333 : "wire<Bits> is not compatible with C layout");
+ 1334 : type = WIRE;
+ 1335 : flags = flags_;
+ 1336 : width = Bits;
+ 1337 : lsb_at = lsb_offset;
+ 1338 : depth = 1;
+ 1339 : zero_at = 0;
+ 1340 : curr = item.curr.data;
+ 1341 : next = item.next.data;
+ 1342 : outline = nullptr;
+ 1343 : attrs = nullptr;
+ 1344 : }
+ 1345 :
+ 1346 : template<size_t Width>
+ 1347 : debug_item(memory<Width> &item, size_t zero_offset = 0) {
+ 1348 : static_assert(Width == 0 || sizeof(item.data[0]) == value<Width>::chunks * sizeof(chunk_t),
+ 1349 : "memory<Width> is not compatible with C layout");
+ 1350 : type = MEMORY;
+ 1351 : flags = 0;
+ 1352 : width = Width;
+ 1353 : lsb_at = 0;
+ 1354 : depth = item.depth;
+ 1355 : zero_at = zero_offset;
+ 1356 : curr = item.data ? item.data[0].data : nullptr;
+ 1357 : next = nullptr;
+ 1358 : outline = nullptr;
+ 1359 : attrs = nullptr;
+ 1360 : }
+ 1361 :
+ 1362 : template<size_t Bits>
+ 1363 : debug_item(debug_alias, const value<Bits> &item, size_t lsb_offset = 0) {
+ 1364 : static_assert(Bits == 0 || sizeof(item) == value<Bits>::chunks * sizeof(chunk_t),
+ 1365 : "value<Bits> is not compatible with C layout");
+ 1366 : type = ALIAS;
+ 1367 : flags = DRIVEN_COMB;
+ 1368 : width = Bits;
+ 1369 : lsb_at = lsb_offset;
+ 1370 : depth = 1;
+ 1371 : zero_at = 0;
+ 1372 : curr = const_cast<chunk_t*>(item.data);
+ 1373 : next = nullptr;
+ 1374 : outline = nullptr;
+ 1375 : attrs = nullptr;
+ 1376 : }
+ 1377 :
+ 1378 : template<size_t Bits>
+ 1379 : debug_item(debug_alias, const wire<Bits> &item, size_t lsb_offset = 0) {
+ 1380 : static_assert(Bits == 0 ||
+ 1381 : (sizeof(item.curr) == value<Bits>::chunks * sizeof(chunk_t) &&
+ 1382 : sizeof(item.next) == value<Bits>::chunks * sizeof(chunk_t)),
+ 1383 : "wire<Bits> is not compatible with C layout");
+ 1384 : type = ALIAS;
+ 1385 : flags = DRIVEN_COMB;
+ 1386 : width = Bits;
+ 1387 : lsb_at = lsb_offset;
+ 1388 : depth = 1;
+ 1389 : zero_at = 0;
+ 1390 : curr = const_cast<chunk_t*>(item.curr.data);
+ 1391 : next = nullptr;
+ 1392 : outline = nullptr;
+ 1393 : attrs = nullptr;
+ 1394 : }
+ 1395 :
+ 1396 : template<size_t Bits>
+ 1397 : debug_item(debug_outline &group, const value<Bits> &item, size_t lsb_offset = 0) {
+ 1398 : static_assert(Bits == 0 || sizeof(item) == value<Bits>::chunks * sizeof(chunk_t),
+ 1399 : "value<Bits> is not compatible with C layout");
+ 1400 : type = OUTLINE;
+ 1401 : flags = DRIVEN_COMB;
+ 1402 : width = Bits;
+ 1403 : lsb_at = lsb_offset;
+ 1404 : depth = 1;
+ 1405 : zero_at = 0;
+ 1406 : curr = const_cast<chunk_t*>(item.data);
+ 1407 : next = nullptr;
+ 1408 : outline = &group;
+ 1409 : attrs = nullptr;
+ 1410 : }
+ 1411 :
+ 1412 : template<size_t Bits, class IntegerT>
+ 1413 : IntegerT get() const {
+ 1414 : assert(width == Bits && depth == 1);
+ 1415 : value<Bits> item;
+ 1416 : std::copy(curr, curr + value<Bits>::chunks, item.data);
+ 1417 : return item.template get<IntegerT>();
+ 1418 : }
+ 1419 :
+ 1420 : template<size_t Bits, class IntegerT>
+ 1421 : void set(IntegerT other) const {
+ 1422 : assert(width == Bits && depth == 1);
+ 1423 : value<Bits> item;
+ 1424 : item.template set<IntegerT>(other);
+ 1425 : std::copy(item.data, item.data + value<Bits>::chunks, next);
+ 1426 : }
+ 1427 : };
+ 1428 : static_assert(std::is_standard_layout<debug_item>::value, "debug_item is not compatible with C layout");
+ 1429 :
+ 1430 : } // namespace cxxrtl
+ 1431 :
+ 1432 9 : typedef struct _cxxrtl_attr_set {
+ 1433 : cxxrtl::metadata_map map;
+ 1434 : } *cxxrtl_attr_set;
+ 1435 :
+ 1436 : namespace cxxrtl {
+ 1437 :
+ 1438 : // Representation of an attribute set in the C++ interface.
+ 1439 : using debug_attrs = ::_cxxrtl_attr_set;
+ 1440 :
+ 1441 3 : struct debug_items {
+ 1442 : // Debug items may be composed of multiple parts, but the attributes are shared between all of them.
+ 1443 : // There are additional invariants, not all of which are not checked by this code:
+ 1444 : // - Memories and non-memories cannot be mixed together.
+ 1445 : // - Bit indices (considering `lsb_at` and `width`) must not overlap.
+ 1446 : // - Row indices (considering `depth` and `zero_at`) must be the same.
+ 1447 : // - The `INPUT` and `OUTPUT` flags must be the same for all parts.
+ 1448 : // Other than that, the parts can be quite different, e.g. it is OK to mix a value, a wire, an alias,
+ 1449 : // and an outline, in the debug information for a single name in four parts.
+ 1450 : std::map<std::string, std::vector<debug_item>> table;
+ 1451 : std::map<std::string, std::unique_ptr<debug_attrs>> attrs_table;
+ 1452 :
+ 1453 9 : void add(const std::string &path, debug_item &&item, metadata_map &&item_attrs = {}) {
+ 1454 9 : assert((path.empty() || path[path.size() - 1] != ' ') && path.find(" ") == std::string::npos);
+ 1455 9 : std::unique_ptr<debug_attrs> &attrs = attrs_table[path];
+ 1456 9 : if (attrs.get() == nullptr)
+ 1457 9 : attrs = std::unique_ptr<debug_attrs>(new debug_attrs);
+ 1458 18 : for (auto attr : item_attrs)
+ 1459 18 : attrs->map.insert(attr);
+ 1460 9 : item.attrs = attrs.get();
+ 1461 9 : std::vector<debug_item> &parts = table[path];
+ 1462 9 : parts.emplace_back(item);
+ 1463 9 : std::sort(parts.begin(), parts.end(),
+ 1464 0 : [](const debug_item &a, const debug_item &b) {
+ 1465 0 : return a.lsb_at < b.lsb_at;
+ 1466 : });
+ 1467 9 : }
+ 1468 :
+ 1469 : // This overload exists to reduce excessive stack slot allocation in `CXXRTL_EXTREMELY_COLD void debug_info()`.
+ 1470 : template<class... T>
+ 1471 9 : void add(const std::string &base_path, const char *path, const char *serialized_item_attrs, T&&... args) {
+ 1472 18 : add(base_path + path, debug_item(std::forward<T>(args)...), metadata::deserialize(serialized_item_attrs));
+ 1473 9 : }
+ 1474 :
+ 1475 : size_t count(const std::string &path) const {
+ 1476 : if (table.count(path) == 0)
+ 1477 : return 0;
+ 1478 : return table.at(path).size();
+ 1479 : }
+ 1480 :
+ 1481 : const std::vector<debug_item> &at(const std::string &path) const {
+ 1482 : return table.at(path);
+ 1483 : }
+ 1484 :
+ 1485 : // Like `at()`, but operates only on single-part debug items.
+ 1486 : const debug_item &operator [](const std::string &path) const {
+ 1487 : const std::vector<debug_item> &parts = table.at(path);
+ 1488 : assert(parts.size() == 1);
+ 1489 : return parts.at(0);
+ 1490 : }
+ 1491 :
+ 1492 : bool is_memory(const std::string &path) const {
+ 1493 : return at(path).at(0).type == debug_item::MEMORY;
+ 1494 : }
+ 1495 :
+ 1496 : const metadata_map &attrs(const std::string &path) const {
+ 1497 : return attrs_table.at(path)->map;
+ 1498 : }
+ 1499 : };
+ 1500 :
+ 1501 : // Only `module` scopes are defined. The type is implicit, since Yosys does not currently support
+ 1502 : // any other scope types.
+ 1503 3 : struct debug_scope {
+ 1504 : std::string module_name;
+ 1505 : std::unique_ptr<debug_attrs> module_attrs;
+ 1506 : std::unique_ptr<debug_attrs> cell_attrs;
+ 1507 : };
+ 1508 :
+ 1509 : struct debug_scopes {
+ 1510 : std::map<std::string, debug_scope> table;
+ 1511 :
+ 1512 0 : void add(const std::string &path, const std::string &module_name, metadata_map &&module_attrs, metadata_map &&cell_attrs) {
+ 1513 0 : assert((path.empty() || path[path.size() - 1] != ' ') && path.find(" ") == std::string::npos);
+ 1514 0 : assert(table.count(path) == 0);
+ 1515 0 : debug_scope &scope = table[path];
+ 1516 0 : scope.module_name = module_name;
+ 1517 0 : scope.module_attrs = std::unique_ptr<debug_attrs>(new debug_attrs { module_attrs });
+ 1518 0 : scope.cell_attrs = std::unique_ptr<debug_attrs>(new debug_attrs { cell_attrs });
+ 1519 0 : }
+ 1520 :
+ 1521 : // This overload exists to reduce excessive stack slot allocation in `CXXRTL_EXTREMELY_COLD void debug_info()`.
+ 1522 : void add(const std::string &base_path, const char *path, const char *module_name, const char *serialized_module_attrs, const char *serialized_cell_attrs) {
+ 1523 : add(base_path + path, module_name, metadata::deserialize(serialized_module_attrs), metadata::deserialize(serialized_cell_attrs));
+ 1524 : }
+ 1525 :
+ 1526 : size_t contains(const std::string &path) const {
+ 1527 : return table.count(path);
+ 1528 : }
+ 1529 :
+ 1530 : const debug_scope &operator [](const std::string &path) const {
+ 1531 : return table.at(path);
+ 1532 : }
+ 1533 : };
+ 1534 :
+ 1535 : // Tag class to disambiguate the default constructor used by the toplevel module that calls `reset()`,
+ 1536 : // and the constructor of interior modules that should not call it.
+ 1537 : struct interior {};
+ 1538 :
+ 1539 : // The core API of the `module` class consists of only four virtual methods: `reset()`, `eval()`,
+ 1540 : // `commit`, and `debug_info()`. (The virtual destructor is made necessary by C++.) Every other method
+ 1541 : // is a convenience method, and exists solely to simplify some common pattern for C++ API consumers.
+ 1542 : // No behavior may be added to such convenience methods that other parts of CXXRTL can rely on, since
+ 1543 : // there is no guarantee they will be called (and, for example, other CXXRTL libraries will often call
+ 1544 : // the `eval()` and `commit()` directly instead, as well as being exposed in the C API).
+ 1545 : struct module {
+ 1546 3 : module() {}
+ 1547 3 : virtual ~module() {}
+ 1548 :
+ 1549 : // Modules with black boxes cannot be copied. Although not all designs include black boxes,
+ 1550 : // delete the copy constructor and copy assignment operator to make sure that any downstream
+ 1551 : // code that manipulates modules doesn't accidentally depend on their availability.
+ 1552 : module(const module &) = delete;
+ 1553 : module &operator=(const module &) = delete;
+ 1554 :
+ 1555 : module(module &&) = default;
+ 1556 : module &operator=(module &&) = default;
+ 1557 :
+ 1558 : virtual void reset() = 0;
+ 1559 :
+ 1560 : // The `eval()` callback object, `performer`, is included in the virtual call signature since
+ 1561 : // the generated code has broadly identical performance properties.
+ 1562 : virtual bool eval(performer *performer = nullptr) = 0;
+ 1563 :
+ 1564 : // The `commit()` callback object, `observer`, is not included in the virtual call signature since
+ 1565 : // the generated code is severely pessimized by it. To observe commit events, the non-virtual
+ 1566 : // `commit(observer *)` overload must be called directly on a `module` subclass.
+ 1567 : virtual bool commit() = 0;
+ 1568 :
+ 1569 33 : size_t step(performer *performer = nullptr) {
+ 1570 33 : size_t deltas = 0;
+ 1571 33 : bool converged = false;
+ 1572 33 : do {
+ 1573 33 : converged = eval(performer);
+ 1574 33 : deltas++;
+ 1575 66 : } while (commit() && !converged);
+ 1576 33 : return deltas;
+ 1577 : }
+ 1578 :
+ 1579 : virtual void debug_info(debug_items *items, debug_scopes *scopes, std::string path, metadata_map &&cell_attrs = {}) {
+ 1580 : (void)items, (void)scopes, (void)path, (void)cell_attrs;
+ 1581 : }
+ 1582 :
+ 1583 : // Compatibility method.
+ 1584 : #if __has_attribute(deprecated)
+ 1585 : __attribute__((deprecated("Use `debug_info(path, &items, /*scopes=*/nullptr);` instead. (`path` could be \"top \".)")))
+ 1586 : #endif
+ 1587 : void debug_info(debug_items &items, std::string path) {
+ 1588 : debug_info(&items, /*scopes=*/nullptr, path);
+ 1589 : }
+ 1590 : };
+ 1591 :
+ 1592 : } // namespace cxxrtl
+ 1593 :
+ 1594 : // Internal structures used to communicate with the implementation of the C interface.
+ 1595 :
+ 1596 : typedef struct _cxxrtl_toplevel {
+ 1597 : std::unique_ptr<cxxrtl::module> module;
+ 1598 : } *cxxrtl_toplevel;
+ 1599 :
+ 1600 : typedef struct _cxxrtl_outline {
+ 1601 : std::function<void()> eval;
+ 1602 : } *cxxrtl_outline;
+ 1603 :
+ 1604 : // Definitions of internal Yosys cells. Other than the functions in this namespace, CXXRTL is fully generic
+ 1605 : // and indepenent of Yosys implementation details.
+ 1606 : //
+ 1607 : // The `write_cxxrtl` pass translates internal cells (cells with names that start with `$`) to calls of these
+ 1608 : // functions. All of Yosys arithmetic and logical cells perform sign or zero extension on their operands,
+ 1609 : // whereas basic operations on arbitrary width values require operands to be of the same width. These functions
+ 1610 : // bridge the gap by performing the necessary casts. They are named similar to `cell_A[B]`, where A and B are `u`
+ 1611 : // if the corresponding operand is unsigned, and `s` if it is signed.
+ 1612 : namespace cxxrtl_yosys {
+ 1613 :
+ 1614 : using namespace cxxrtl;
+ 1615 :
+ 1616 : // std::max isn't constexpr until C++14 for no particular reason (it's an oversight), so we define our own.
+ 1617 : template<class T>
+ 1618 : CXXRTL_ALWAYS_INLINE
+ 1619 : constexpr T max(const T &a, const T &b) {
+ 1620 : return a > b ? a : b;
+ 1621 : }
+ 1622 :
+ 1623 : // Logic operations
+ 1624 : template<size_t BitsY, size_t BitsA>
+ 1625 : CXXRTL_ALWAYS_INLINE
+ 1626 : value<BitsY> logic_not(const value<BitsA> &a) {
+ 1627 : return value<BitsY> { a ? 0u : 1u };
+ 1628 : }
+ 1629 :
+ 1630 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1631 : CXXRTL_ALWAYS_INLINE
+ 1632 : value<BitsY> logic_and(const value<BitsA> &a, const value<BitsB> &b) {
+ 1633 : return value<BitsY> { (bool(a) && bool(b)) ? 1u : 0u };
+ 1634 : }
+ 1635 :
+ 1636 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1637 : CXXRTL_ALWAYS_INLINE
+ 1638 : value<BitsY> logic_or(const value<BitsA> &a, const value<BitsB> &b) {
+ 1639 : return value<BitsY> { (bool(a) || bool(b)) ? 1u : 0u };
+ 1640 : }
+ 1641 :
+ 1642 : // Reduction operations
+ 1643 : template<size_t BitsY, size_t BitsA>
+ 1644 : CXXRTL_ALWAYS_INLINE
+ 1645 : value<BitsY> reduce_and(const value<BitsA> &a) {
+ 1646 : return value<BitsY> { a.bit_not().is_zero() ? 1u : 0u };
+ 1647 : }
+ 1648 :
+ 1649 : template<size_t BitsY, size_t BitsA>
+ 1650 : CXXRTL_ALWAYS_INLINE
+ 1651 : value<BitsY> reduce_or(const value<BitsA> &a) {
+ 1652 : return value<BitsY> { a ? 1u : 0u };
+ 1653 : }
+ 1654 :
+ 1655 : template<size_t BitsY, size_t BitsA>
+ 1656 : CXXRTL_ALWAYS_INLINE
+ 1657 : value<BitsY> reduce_xor(const value<BitsA> &a) {
+ 1658 : return value<BitsY> { (a.ctpop() % 2) ? 1u : 0u };
+ 1659 : }
+ 1660 :
+ 1661 : template<size_t BitsY, size_t BitsA>
+ 1662 : CXXRTL_ALWAYS_INLINE
+ 1663 : value<BitsY> reduce_xnor(const value<BitsA> &a) {
+ 1664 : return value<BitsY> { (a.ctpop() % 2) ? 0u : 1u };
+ 1665 : }
+ 1666 :
+ 1667 : template<size_t BitsY, size_t BitsA>
+ 1668 : CXXRTL_ALWAYS_INLINE
+ 1669 : value<BitsY> reduce_bool(const value<BitsA> &a) {
+ 1670 : return value<BitsY> { a ? 1u : 0u };
+ 1671 : }
+ 1672 :
+ 1673 : // Bitwise operations
+ 1674 : template<size_t BitsY, size_t BitsA>
+ 1675 : CXXRTL_ALWAYS_INLINE
+ 1676 : value<BitsY> not_u(const value<BitsA> &a) {
+ 1677 : return a.template zcast<BitsY>().bit_not();
+ 1678 : }
+ 1679 :
+ 1680 : template<size_t BitsY, size_t BitsA>
+ 1681 : CXXRTL_ALWAYS_INLINE
+ 1682 : value<BitsY> not_s(const value<BitsA> &a) {
+ 1683 : return a.template scast<BitsY>().bit_not();
+ 1684 : }
+ 1685 :
+ 1686 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1687 : CXXRTL_ALWAYS_INLINE
+ 1688 : value<BitsY> and_uu(const value<BitsA> &a, const value<BitsB> &b) {
+ 1689 : return a.template zcast<BitsY>().bit_and(b.template zcast<BitsY>());
+ 1690 : }
+ 1691 :
+ 1692 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1693 : CXXRTL_ALWAYS_INLINE
+ 1694 : value<BitsY> and_ss(const value<BitsA> &a, const value<BitsB> &b) {
+ 1695 : return a.template scast<BitsY>().bit_and(b.template scast<BitsY>());
+ 1696 : }
+ 1697 :
+ 1698 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1699 : CXXRTL_ALWAYS_INLINE
+ 1700 : value<BitsY> or_uu(const value<BitsA> &a, const value<BitsB> &b) {
+ 1701 : return a.template zcast<BitsY>().bit_or(b.template zcast<BitsY>());
+ 1702 : }
+ 1703 :
+ 1704 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1705 : CXXRTL_ALWAYS_INLINE
+ 1706 : value<BitsY> or_ss(const value<BitsA> &a, const value<BitsB> &b) {
+ 1707 : return a.template scast<BitsY>().bit_or(b.template scast<BitsY>());
+ 1708 : }
+ 1709 :
+ 1710 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1711 : CXXRTL_ALWAYS_INLINE
+ 1712 : value<BitsY> xor_uu(const value<BitsA> &a, const value<BitsB> &b) {
+ 1713 : return a.template zcast<BitsY>().bit_xor(b.template zcast<BitsY>());
+ 1714 : }
+ 1715 :
+ 1716 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1717 : CXXRTL_ALWAYS_INLINE
+ 1718 : value<BitsY> xor_ss(const value<BitsA> &a, const value<BitsB> &b) {
+ 1719 : return a.template scast<BitsY>().bit_xor(b.template scast<BitsY>());
+ 1720 : }
+ 1721 :
+ 1722 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1723 : CXXRTL_ALWAYS_INLINE
+ 1724 : value<BitsY> xnor_uu(const value<BitsA> &a, const value<BitsB> &b) {
+ 1725 : return a.template zcast<BitsY>().bit_xor(b.template zcast<BitsY>()).bit_not();
+ 1726 : }
+ 1727 :
+ 1728 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1729 : CXXRTL_ALWAYS_INLINE
+ 1730 : value<BitsY> xnor_ss(const value<BitsA> &a, const value<BitsB> &b) {
+ 1731 : return a.template scast<BitsY>().bit_xor(b.template scast<BitsY>()).bit_not();
+ 1732 : }
+ 1733 :
+ 1734 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1735 : CXXRTL_ALWAYS_INLINE
+ 1736 : value<BitsY> shl_uu(const value<BitsA> &a, const value<BitsB> &b) {
+ 1737 : return a.template zcast<BitsY>().shl(b);
+ 1738 : }
+ 1739 :
+ 1740 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1741 : CXXRTL_ALWAYS_INLINE
+ 1742 : value<BitsY> shl_su(const value<BitsA> &a, const value<BitsB> &b) {
+ 1743 : return a.template scast<BitsY>().shl(b);
+ 1744 : }
+ 1745 :
+ 1746 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1747 : CXXRTL_ALWAYS_INLINE
+ 1748 : value<BitsY> sshl_uu(const value<BitsA> &a, const value<BitsB> &b) {
+ 1749 : return a.template zcast<BitsY>().shl(b);
+ 1750 : }
+ 1751 :
+ 1752 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1753 : CXXRTL_ALWAYS_INLINE
+ 1754 : value<BitsY> sshl_su(const value<BitsA> &a, const value<BitsB> &b) {
+ 1755 : return a.template scast<BitsY>().shl(b);
+ 1756 : }
+ 1757 :
+ 1758 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1759 : CXXRTL_ALWAYS_INLINE
+ 1760 : value<BitsY> shr_uu(const value<BitsA> &a, const value<BitsB> &b) {
+ 1761 : return a.shr(b).template zcast<BitsY>();
+ 1762 : }
+ 1763 :
+ 1764 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1765 : CXXRTL_ALWAYS_INLINE
+ 1766 : value<BitsY> shr_su(const value<BitsA> &a, const value<BitsB> &b) {
+ 1767 : return a.shr(b).template scast<BitsY>();
+ 1768 : }
+ 1769 :
+ 1770 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1771 : CXXRTL_ALWAYS_INLINE
+ 1772 : value<BitsY> sshr_uu(const value<BitsA> &a, const value<BitsB> &b) {
+ 1773 : return a.shr(b).template zcast<BitsY>();
+ 1774 : }
+ 1775 :
+ 1776 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1777 : CXXRTL_ALWAYS_INLINE
+ 1778 : value<BitsY> sshr_su(const value<BitsA> &a, const value<BitsB> &b) {
+ 1779 : return a.sshr(b).template scast<BitsY>();
+ 1780 : }
+ 1781 :
+ 1782 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1783 : CXXRTL_ALWAYS_INLINE
+ 1784 : value<BitsY> shift_uu(const value<BitsA> &a, const value<BitsB> &b) {
+ 1785 : return shr_uu<BitsY>(a, b);
+ 1786 : }
+ 1787 :
+ 1788 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1789 : CXXRTL_ALWAYS_INLINE
+ 1790 : value<BitsY> shift_su(const value<BitsA> &a, const value<BitsB> &b) {
+ 1791 : return shr_su<BitsY>(a, b);
+ 1792 : }
+ 1793 :
+ 1794 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1795 : CXXRTL_ALWAYS_INLINE
+ 1796 : value<BitsY> shift_us(const value<BitsA> &a, const value<BitsB> &b) {
+ 1797 : return b.is_neg() ? shl_uu<BitsY>(a, b.template sext<BitsB + 1>().neg()) : shr_uu<BitsY>(a, b);
+ 1798 : }
+ 1799 :
+ 1800 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1801 : CXXRTL_ALWAYS_INLINE
+ 1802 : value<BitsY> shift_ss(const value<BitsA> &a, const value<BitsB> &b) {
+ 1803 : return b.is_neg() ? shl_su<BitsY>(a, b.template sext<BitsB + 1>().neg()) : shr_su<BitsY>(a, b);
+ 1804 : }
+ 1805 :
+ 1806 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1807 : CXXRTL_ALWAYS_INLINE
+ 1808 : value<BitsY> shiftx_uu(const value<BitsA> &a, const value<BitsB> &b) {
+ 1809 : return shift_uu<BitsY>(a, b);
+ 1810 : }
+ 1811 :
+ 1812 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1813 : CXXRTL_ALWAYS_INLINE
+ 1814 : value<BitsY> shiftx_su(const value<BitsA> &a, const value<BitsB> &b) {
+ 1815 : return shift_su<BitsY>(a, b);
+ 1816 : }
+ 1817 :
+ 1818 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1819 : CXXRTL_ALWAYS_INLINE
+ 1820 : value<BitsY> shiftx_us(const value<BitsA> &a, const value<BitsB> &b) {
+ 1821 : return shift_us<BitsY>(a, b);
+ 1822 : }
+ 1823 :
+ 1824 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1825 : CXXRTL_ALWAYS_INLINE
+ 1826 : value<BitsY> shiftx_ss(const value<BitsA> &a, const value<BitsB> &b) {
+ 1827 : return shift_ss<BitsY>(a, b);
+ 1828 : }
+ 1829 :
+ 1830 : // Comparison operations
+ 1831 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1832 : CXXRTL_ALWAYS_INLINE
+ 1833 : value<BitsY> eq_uu(const value<BitsA> &a, const value<BitsB> &b) {
+ 1834 : constexpr size_t BitsExt = max(BitsA, BitsB);
+ 1835 : return value<BitsY>{ a.template zext<BitsExt>() == b.template zext<BitsExt>() ? 1u : 0u };
+ 1836 : }
+ 1837 :
+ 1838 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1839 : CXXRTL_ALWAYS_INLINE
+ 1840 : value<BitsY> eq_ss(const value<BitsA> &a, const value<BitsB> &b) {
+ 1841 : constexpr size_t BitsExt = max(BitsA, BitsB);
+ 1842 : return value<BitsY>{ a.template sext<BitsExt>() == b.template sext<BitsExt>() ? 1u : 0u };
+ 1843 : }
+ 1844 :
+ 1845 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1846 : CXXRTL_ALWAYS_INLINE
+ 1847 : value<BitsY> ne_uu(const value<BitsA> &a, const value<BitsB> &b) {
+ 1848 : constexpr size_t BitsExt = max(BitsA, BitsB);
+ 1849 : return value<BitsY>{ a.template zext<BitsExt>() != b.template zext<BitsExt>() ? 1u : 0u };
+ 1850 : }
+ 1851 :
+ 1852 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1853 : CXXRTL_ALWAYS_INLINE
+ 1854 : value<BitsY> ne_ss(const value<BitsA> &a, const value<BitsB> &b) {
+ 1855 : constexpr size_t BitsExt = max(BitsA, BitsB);
+ 1856 : return value<BitsY>{ a.template sext<BitsExt>() != b.template sext<BitsExt>() ? 1u : 0u };
+ 1857 : }
+ 1858 :
+ 1859 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1860 : CXXRTL_ALWAYS_INLINE
+ 1861 : value<BitsY> eqx_uu(const value<BitsA> &a, const value<BitsB> &b) {
+ 1862 : return eq_uu<BitsY>(a, b);
+ 1863 : }
+ 1864 :
+ 1865 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1866 : CXXRTL_ALWAYS_INLINE
+ 1867 : value<BitsY> eqx_ss(const value<BitsA> &a, const value<BitsB> &b) {
+ 1868 : return eq_ss<BitsY>(a, b);
+ 1869 : }
+ 1870 :
+ 1871 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1872 : CXXRTL_ALWAYS_INLINE
+ 1873 : value<BitsY> nex_uu(const value<BitsA> &a, const value<BitsB> &b) {
+ 1874 : return ne_uu<BitsY>(a, b);
+ 1875 : }
+ 1876 :
+ 1877 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1878 : CXXRTL_ALWAYS_INLINE
+ 1879 : value<BitsY> nex_ss(const value<BitsA> &a, const value<BitsB> &b) {
+ 1880 : return ne_ss<BitsY>(a, b);
+ 1881 : }
+ 1882 :
+ 1883 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1884 : CXXRTL_ALWAYS_INLINE
+ 1885 : value<BitsY> gt_uu(const value<BitsA> &a, const value<BitsB> &b) {
+ 1886 : constexpr size_t BitsExt = max(BitsA, BitsB);
+ 1887 : return value<BitsY> { b.template zext<BitsExt>().ucmp(a.template zext<BitsExt>()) ? 1u : 0u };
+ 1888 : }
+ 1889 :
+ 1890 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1891 : CXXRTL_ALWAYS_INLINE
+ 1892 : value<BitsY> gt_ss(const value<BitsA> &a, const value<BitsB> &b) {
+ 1893 : constexpr size_t BitsExt = max(BitsA, BitsB);
+ 1894 : return value<BitsY> { b.template sext<BitsExt>().scmp(a.template sext<BitsExt>()) ? 1u : 0u };
+ 1895 : }
+ 1896 :
+ 1897 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1898 : CXXRTL_ALWAYS_INLINE
+ 1899 : value<BitsY> ge_uu(const value<BitsA> &a, const value<BitsB> &b) {
+ 1900 : constexpr size_t BitsExt = max(BitsA, BitsB);
+ 1901 : return value<BitsY> { !a.template zext<BitsExt>().ucmp(b.template zext<BitsExt>()) ? 1u : 0u };
+ 1902 : }
+ 1903 :
+ 1904 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1905 : CXXRTL_ALWAYS_INLINE
+ 1906 : value<BitsY> ge_ss(const value<BitsA> &a, const value<BitsB> &b) {
+ 1907 : constexpr size_t BitsExt = max(BitsA, BitsB);
+ 1908 : return value<BitsY> { !a.template sext<BitsExt>().scmp(b.template sext<BitsExt>()) ? 1u : 0u };
+ 1909 : }
+ 1910 :
+ 1911 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1912 : CXXRTL_ALWAYS_INLINE
+ 1913 : value<BitsY> lt_uu(const value<BitsA> &a, const value<BitsB> &b) {
+ 1914 : constexpr size_t BitsExt = max(BitsA, BitsB);
+ 1915 : return value<BitsY> { a.template zext<BitsExt>().ucmp(b.template zext<BitsExt>()) ? 1u : 0u };
+ 1916 : }
+ 1917 :
+ 1918 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1919 : CXXRTL_ALWAYS_INLINE
+ 1920 : value<BitsY> lt_ss(const value<BitsA> &a, const value<BitsB> &b) {
+ 1921 : constexpr size_t BitsExt = max(BitsA, BitsB);
+ 1922 : return value<BitsY> { a.template sext<BitsExt>().scmp(b.template sext<BitsExt>()) ? 1u : 0u };
+ 1923 : }
+ 1924 :
+ 1925 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1926 : CXXRTL_ALWAYS_INLINE
+ 1927 : value<BitsY> le_uu(const value<BitsA> &a, const value<BitsB> &b) {
+ 1928 : constexpr size_t BitsExt = max(BitsA, BitsB);
+ 1929 : return value<BitsY> { !b.template zext<BitsExt>().ucmp(a.template zext<BitsExt>()) ? 1u : 0u };
+ 1930 : }
+ 1931 :
+ 1932 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1933 : CXXRTL_ALWAYS_INLINE
+ 1934 : value<BitsY> le_ss(const value<BitsA> &a, const value<BitsB> &b) {
+ 1935 : constexpr size_t BitsExt = max(BitsA, BitsB);
+ 1936 : return value<BitsY> { !b.template sext<BitsExt>().scmp(a.template sext<BitsExt>()) ? 1u : 0u };
+ 1937 : }
+ 1938 :
+ 1939 : // Arithmetic operations
+ 1940 : template<size_t BitsY, size_t BitsA>
+ 1941 : CXXRTL_ALWAYS_INLINE
+ 1942 : value<BitsY> pos_u(const value<BitsA> &a) {
+ 1943 : return a.template zcast<BitsY>();
+ 1944 : }
+ 1945 :
+ 1946 : template<size_t BitsY, size_t BitsA>
+ 1947 : CXXRTL_ALWAYS_INLINE
+ 1948 : value<BitsY> pos_s(const value<BitsA> &a) {
+ 1949 : return a.template scast<BitsY>();
+ 1950 : }
+ 1951 :
+ 1952 : template<size_t BitsY, size_t BitsA>
+ 1953 : CXXRTL_ALWAYS_INLINE
+ 1954 : value<BitsY> neg_u(const value<BitsA> &a) {
+ 1955 : return a.template zcast<BitsY>().neg();
+ 1956 : }
+ 1957 :
+ 1958 : template<size_t BitsY, size_t BitsA>
+ 1959 : CXXRTL_ALWAYS_INLINE
+ 1960 : value<BitsY> neg_s(const value<BitsA> &a) {
+ 1961 : return a.template scast<BitsY>().neg();
+ 1962 : }
+ 1963 :
+ 1964 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1965 : CXXRTL_ALWAYS_INLINE
+ 1966 33 : value<BitsY> add_uu(const value<BitsA> &a, const value<BitsB> &b) {
+ 1967 33 : return a.template zcast<BitsY>().add(b.template zcast<BitsY>());
+ 1968 : }
+ 1969 :
+ 1970 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1971 : CXXRTL_ALWAYS_INLINE
+ 1972 : value<BitsY> add_ss(const value<BitsA> &a, const value<BitsB> &b) {
+ 1973 : return a.template scast<BitsY>().add(b.template scast<BitsY>());
+ 1974 : }
+ 1975 :
+ 1976 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1977 : CXXRTL_ALWAYS_INLINE
+ 1978 : value<BitsY> sub_uu(const value<BitsA> &a, const value<BitsB> &b) {
+ 1979 : return a.template zcast<BitsY>().sub(b.template zcast<BitsY>());
+ 1980 : }
+ 1981 :
+ 1982 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1983 : CXXRTL_ALWAYS_INLINE
+ 1984 : value<BitsY> sub_ss(const value<BitsA> &a, const value<BitsB> &b) {
+ 1985 : return a.template scast<BitsY>().sub(b.template scast<BitsY>());
+ 1986 : }
+ 1987 :
+ 1988 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1989 : CXXRTL_ALWAYS_INLINE
+ 1990 : value<BitsY> mul_uu(const value<BitsA> &a, const value<BitsB> &b) {
+ 1991 : constexpr size_t BitsM = BitsA >= BitsB ? BitsA : BitsB;
+ 1992 : return a.template zcast<BitsM>().template mul<BitsY>(b.template zcast<BitsM>());
+ 1993 : }
+ 1994 :
+ 1995 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 1996 : CXXRTL_ALWAYS_INLINE
+ 1997 : value<BitsY> mul_ss(const value<BitsA> &a, const value<BitsB> &b) {
+ 1998 : return a.template scast<BitsY>().template mul<BitsY>(b.template scast<BitsY>());
+ 1999 : }
+ 2000 :
+ 2001 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 2002 : CXXRTL_ALWAYS_INLINE
+ 2003 : std::pair<value<BitsY>, value<BitsY>> divmod_uu(const value<BitsA> &a, const value<BitsB> &b) {
+ 2004 : constexpr size_t Bits = max(BitsY, max(BitsA, BitsB));
+ 2005 : value<Bits> quotient;
+ 2006 : value<Bits> remainder;
+ 2007 : value<Bits> dividend = a.template zext<Bits>();
+ 2008 : value<Bits> divisor = b.template zext<Bits>();
+ 2009 : std::tie(quotient, remainder) = dividend.udivmod(divisor);
+ 2010 : return {quotient.template trunc<BitsY>(), remainder.template trunc<BitsY>()};
+ 2011 : }
+ 2012 :
+ 2013 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 2014 : CXXRTL_ALWAYS_INLINE
+ 2015 : std::pair<value<BitsY>, value<BitsY>> divmod_ss(const value<BitsA> &a, const value<BitsB> &b) {
+ 2016 : constexpr size_t Bits = max(BitsY, max(BitsA, BitsB));
+ 2017 : value<Bits> quotient;
+ 2018 : value<Bits> remainder;
+ 2019 : value<Bits> dividend = a.template sext<Bits>();
+ 2020 : value<Bits> divisor = b.template sext<Bits>();
+ 2021 : std::tie(quotient, remainder) = dividend.sdivmod(divisor);
+ 2022 : return {quotient.template trunc<BitsY>(), remainder.template trunc<BitsY>()};
+ 2023 : }
+ 2024 :
+ 2025 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 2026 : CXXRTL_ALWAYS_INLINE
+ 2027 : value<BitsY> div_uu(const value<BitsA> &a, const value<BitsB> &b) {
+ 2028 : return divmod_uu<BitsY>(a, b).first;
+ 2029 : }
+ 2030 :
+ 2031 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 2032 : CXXRTL_ALWAYS_INLINE
+ 2033 : value<BitsY> div_ss(const value<BitsA> &a, const value<BitsB> &b) {
+ 2034 : return divmod_ss<BitsY>(a, b).first;
+ 2035 : }
+ 2036 :
+ 2037 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 2038 : CXXRTL_ALWAYS_INLINE
+ 2039 : value<BitsY> mod_uu(const value<BitsA> &a, const value<BitsB> &b) {
+ 2040 : return divmod_uu<BitsY>(a, b).second;
+ 2041 : }
+ 2042 :
+ 2043 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 2044 : CXXRTL_ALWAYS_INLINE
+ 2045 : value<BitsY> mod_ss(const value<BitsA> &a, const value<BitsB> &b) {
+ 2046 : return divmod_ss<BitsY>(a, b).second;
+ 2047 : }
+ 2048 :
+ 2049 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 2050 : CXXRTL_ALWAYS_INLINE
+ 2051 : value<BitsY> modfloor_uu(const value<BitsA> &a, const value<BitsB> &b) {
+ 2052 : return divmod_uu<BitsY>(a, b).second;
+ 2053 : }
+ 2054 :
+ 2055 : // GHDL Modfloor operator. Returns r=a mod b, such that r has the same sign as b and
+ 2056 : // a=b*N+r where N is some integer
+ 2057 : // In practical terms, when a and b have different signs and the remainder returned by divmod_ss is not 0
+ 2058 : // then return the remainder + b
+ 2059 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 2060 : CXXRTL_ALWAYS_INLINE
+ 2061 : value<BitsY> modfloor_ss(const value<BitsA> &a, const value<BitsB> &b) {
+ 2062 : value<BitsY> r;
+ 2063 : r = divmod_ss<BitsY>(a, b).second;
+ 2064 : if((b.is_neg() != a.is_neg()) && !r.is_zero())
+ 2065 : return add_ss<BitsY>(b, r);
+ 2066 : return r;
+ 2067 : }
+ 2068 :
+ 2069 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 2070 : CXXRTL_ALWAYS_INLINE
+ 2071 : value<BitsY> divfloor_uu(const value<BitsA> &a, const value<BitsB> &b) {
+ 2072 : return divmod_uu<BitsY>(a, b).first;
+ 2073 : }
+ 2074 :
+ 2075 : // Divfloor. Similar to above: returns q=a//b, where q has the sign of a*b and a=b*q+N.
+ 2076 : // In other words, returns (truncating) a/b, except if a and b have different signs
+ 2077 : // and there's non-zero remainder, subtract one more towards floor.
+ 2078 : template<size_t BitsY, size_t BitsA, size_t BitsB>
+ 2079 : CXXRTL_ALWAYS_INLINE
+ 2080 : value<BitsY> divfloor_ss(const value<BitsA> &a, const value<BitsB> &b) {
+ 2081 : value<BitsY> q, r;
+ 2082 : std::tie(q, r) = divmod_ss<BitsY>(a, b);
+ 2083 : if ((b.is_neg() != a.is_neg()) && !r.is_zero())
+ 2084 : return sub_uu<BitsY>(q, value<1> { 1u });
+ 2085 : return q;
+ 2086 :
+ 2087 : }
+ 2088 :
+ 2089 : // Memory helper
+ 2090 : struct memory_index {
+ 2091 : bool valid;
+ 2092 : size_t index;
+ 2093 :
+ 2094 : template<size_t BitsAddr>
+ 2095 : memory_index(const value<BitsAddr> &addr, size_t offset, size_t depth) {
+ 2096 : static_assert(value<BitsAddr>::chunks <= 1, "memory address is too wide");
+ 2097 : size_t offset_index = addr.data[0];
+ 2098 :
+ 2099 : valid = (offset_index >= offset && offset_index < offset + depth);
+ 2100 : index = offset_index - offset;
+ 2101 : }
+ 2102 : };
+ 2103 :
+ 2104 : } // namespace cxxrtl_yosys
+ 2105 :
+ 2106 : #endif
+
+ |
+
+