Skip to content

Commit

Permalink
fmt,cxxrtl: add UNICHAR format type.
Browse files Browse the repository at this point in the history
This format type is used to print an Unicode character (code point) as
its UTF-8 serialization. To this end, two UTF-8 decoders (one for fmt,
one for cxxrtl) are added for rendering. When converted to a Verilog
format specifier, `UNICHAR` degrades to `%c` with the low 7 bits of
the code point, which has equivalent behavior for inputs not exceeding
ASCII. (SystemVerilog leaves source and display encodings completely
undefined.)
  • Loading branch information
whitequark committed Mar 28, 2024
1 parent 54097ba commit f722ddf
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 6 deletions.
24 changes: 22 additions & 2 deletions backends/cxxrtl/runtime/cxxrtl/cxxrtl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1013,13 +1013,14 @@ struct fmt_part {
LITERAL = 0,
INTEGER = 1,
STRING = 2,
VLOG_TIME = 3,
UNICHAR = 3,
VLOG_TIME = 4,
} type;

// LITERAL type
std::string str;

// INTEGER/STRING types
// INTEGER/STRING/UNICHAR types
// + value<Bits> val;

// INTEGER/STRING/VLOG_TIME types
Expand Down Expand Up @@ -1073,6 +1074,25 @@ struct fmt_part {
break;
}

case UNICHAR: {
uint32_t codepoint = val.template get<uint32_t>();
if (codepoint >= 0x10000)
buf += (char)(0xf0 | (codepoint >> 18));
else if (codepoint >= 0x800)
buf += (char)(0xe0 | (codepoint >> 12));
else if (codepoint >= 0x80)
buf += (char)(0xc0 | (codepoint >> 6));
else
buf += (char)codepoint;
if (codepoint >= 0x10000)
buf += (char)(0x80 | ((codepoint >> 12) & 0x3f));
if (codepoint >= 0x800)
buf += (char)(0x80 | ((codepoint >> 6) & 0x3f));
if (codepoint >= 0x80)
buf += (char)(0x80 | ((codepoint >> 0) & 0x3f));
break;
}

case INTEGER: {
size_t width = Bits;
if (base != 10) {
Expand Down
47 changes: 45 additions & 2 deletions kernel/fmt.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ void Fmt::parse_rtlil(const RTLIL::Cell *cell) {
} else if (fmt.substr(i, 2) == "{{") {
part.str += '{';
++i;
} else if (fmt[i] == '}')
} else if (fmt[i] == '}') {
log_assert(false && "Unexpected '}' in format string");
else if (fmt[i] == '{') {
} else if (fmt[i] == '{') {
if (!part.str.empty()) {
part.type = FmtPart::LITERAL;
parts.push_back(part);
Expand Down Expand Up @@ -74,6 +74,12 @@ void Fmt::parse_rtlil(const RTLIL::Cell *cell) {
part.sig = args.extract(0, arg_size);
args.remove(0, arg_size);

if (fmt[i] == 'U') {
part.type = FmtPart::UNICHAR;
++i;
goto success;
}

if (fmt[i] == '>')
part.justify = FmtPart::RIGHT;
else if (fmt[i] == '<')
Expand Down Expand Up @@ -156,6 +162,7 @@ void Fmt::parse_rtlil(const RTLIL::Cell *cell) {
log_assert(false && "Unexpected end in format substitution");
}

success:
if (fmt[i] != '}')
log_assert(false && "Expected '}' after format substitution");

Expand Down Expand Up @@ -188,6 +195,11 @@ void Fmt::emit_rtlil(RTLIL::Cell *cell) const {
}
break;

case FmtPart::UNICHAR:
log_assert(part.sig.size() <= 32);
fmt += "{U}";
break;

case FmtPart::VLOG_TIME:
log_assert(part.sig.size() == 0);
YS_FALLTHROUGH
Expand Down Expand Up @@ -568,6 +580,16 @@ std::vector<VerilogFmtArg> Fmt::emit_verilog() const
break;
}

case FmtPart::UNICHAR: {
VerilogFmtArg arg;
arg.type = VerilogFmtArg::INTEGER;
arg.sig = part.sig.extract(0, 7); // only ASCII
args.push_back(arg);

fmt.str += "%c";
break;
}

case FmtPart::VLOG_TIME: {
VerilogFmtArg arg;
arg.type = VerilogFmtArg::TIME;
Expand Down Expand Up @@ -630,6 +652,7 @@ void Fmt::emit_cxxrtl(std::ostream &os, std::string indent, std::function<void(c
case FmtPart::LITERAL: os << "LITERAL"; break;
case FmtPart::INTEGER: os << "INTEGER"; break;
case FmtPart::STRING: os << "STRING"; break;
case FmtPart::UNICHAR: os << "UNICHAR"; break;
case FmtPart::VLOG_TIME: os << "VLOG_TIME"; break;
}
os << ", ";
Expand Down Expand Up @@ -671,6 +694,26 @@ std::string Fmt::render() const
str += part.str;
break;

case FmtPart::UNICHAR: {
RTLIL::Const value = part.sig.as_const();
uint32_t codepoint = value.as_int();
if (codepoint >= 0x10000)
str += (char)(0xf0 | (codepoint >> 18));
else if (codepoint >= 0x800)
str += (char)(0xe0 | (codepoint >> 12));
else if (codepoint >= 0x80)
str += (char)(0xc0 | (codepoint >> 6));
else
str += (char)codepoint;
if (codepoint >= 0x10000)
str += (char)(0x80 | ((codepoint >> 12) & 0x3f));
if (codepoint >= 0x800)
str += (char)(0x80 | ((codepoint >> 6) & 0x3f));
if (codepoint >= 0x80)
str += (char)(0x80 | ((codepoint >> 0) & 0x3f));
break;
}

case FmtPart::INTEGER:
case FmtPart::STRING:
case FmtPart::VLOG_TIME: {
Expand Down
5 changes: 3 additions & 2 deletions kernel/fmt.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,14 @@ struct FmtPart {
LITERAL = 0,
INTEGER = 1,
STRING = 2,
VLOG_TIME = 3,
UNICHAR = 3,
VLOG_TIME = 4,
} type;

// LITERAL type
std::string str;

// INTEGER/STRING types
// INTEGER/STRING/UNICHAR types
RTLIL::SigSpec sig;

// INTEGER/STRING/VLOG_TIME types
Expand Down

0 comments on commit f722ddf

Please sign in to comment.