Skip to content

Commit

Permalink
Part of PR. Style
Browse files Browse the repository at this point in the history
Часть большого ПР REVIEW:7264088
commit_hash:0f5b03fbbed0ac30f734943309e3ef5cd4d7a30e
  • Loading branch information
nae202 committed Nov 20, 2024
1 parent bb55cdf commit 2a0addd
Show file tree
Hide file tree
Showing 47 changed files with 689 additions and 363 deletions.
6 changes: 4 additions & 2 deletions util/charset/unicode_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,15 +101,17 @@ namespace NUnicodeTable {
}

inline TValueRef Get(size_t key, TValueRef value) const {
if (key >= Size())
if (key >= Size()) {
return value;
}

return GetImpl(key);
}

inline TValueRef Get(size_t key, size_t defaultKey) const {
if (key >= Size())
if (key >= Size()) {
return Get(defaultKey);
}

return GetImpl(key);
}
Expand Down
3 changes: 2 additions & 1 deletion util/charset/unidata.h
Original file line number Diff line number Diff line change
Expand Up @@ -401,8 +401,9 @@ inline bool IsPrint(wchar32 ch) {
}

inline bool IsRomanDigit(wchar32 ch) {
if (NUnicode::CharHasType(ch, SHIFT(Nl_LETTER)) && 0x2160 <= ch && ch <= 0x2188)
if (NUnicode::CharHasType(ch, SHIFT(Nl_LETTER)) && 0x2160 <= ch && ch <= 0x2188) {
return true;
}
if (ch < 127) {
switch (static_cast<char>(::ToLower(ch))) {
case 'i':
Expand Down
3 changes: 2 additions & 1 deletion util/charset/utf8.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,9 @@ namespace {
}
cNew = ConvertChar(conversion, c);

if (cNew != c)
if (cNew != c) {
break;
}
p += cLen;
}
if (p == end) {
Expand Down
57 changes: 35 additions & 22 deletions util/charset/utf8.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,18 +37,19 @@ inline size_t UTF8RuneLen(const unsigned char lead_byte) {
}

inline size_t UTF8RuneLenByUCS(wchar32 rune) {
if (rune < 0x80)
if (rune < 0x80) {
return 1U;
else if (rune < 0x800)
} else if (rune < 0x800) {
return 2U;
else if (rune < 0x10000)
} else if (rune < 0x10000) {
return 3U;
else if (rune < 0x200000)
} else if (rune < 0x200000) {
return 4U;
else if (rune < 0x4000000)
} else if (rune < 0x4000000) {
return 5U;
else
} else {
return 6U;
}
}

inline void PutUTF8LeadBits(wchar32& rune, unsigned char c, size_t len) {
Expand Down Expand Up @@ -193,37 +194,45 @@ inline RECODE_RESULT SafeReadUTF8Char(wchar32& rune, size_t& rune_len, const uns
wchar32 _rune;

size_t _len = UTF8RuneLen(*s);
if (s + _len > end)
if (s + _len > end) {
return RECODE_EOINPUT; // [EOINPUT]
if (_len == 0)
}
if (_len == 0) {
return RECODE_BROKENSYMBOL; // [BROKENSYMBOL] in first byte
_rune = *s++; // [00000000 0XXXXXXX]
}
_rune = *s++; // [00000000 0XXXXXXX]

if (_len > 1) {
_rune &= UTF8LeadByteMask(_len);
unsigned char ch = *s++;
if (!IsUTF8ContinuationByte(ch))
if (!IsUTF8ContinuationByte(ch)) {
return RECODE_BROKENSYMBOL; // [BROKENSYMBOL] in second byte
PutUTF8SixBits(_rune, ch); // [00000XXX XXYYYYYY]
}
PutUTF8SixBits(_rune, ch); // [00000XXX XXYYYYYY]
if (_len > 2) {
ch = *s++;
if (!IsUTF8ContinuationByte(ch))
if (!IsUTF8ContinuationByte(ch)) {
return RECODE_BROKENSYMBOL; // [BROKENSYMBOL] in third byte
PutUTF8SixBits(_rune, ch); // [XXXXYYYY YYZZZZZZ]
}
PutUTF8SixBits(_rune, ch); // [XXXXYYYY YYZZZZZZ]
if (_len > 3) {
ch = *s;
if (!IsUTF8ContinuationByte(ch))
if (!IsUTF8ContinuationByte(ch)) {
return RECODE_BROKENSYMBOL; // [BROKENSYMBOL] in fourth byte
PutUTF8SixBits(_rune, ch); // [XXXYY YYYYZZZZ ZZQQQQQQ]
if (!IsValidUTF8Rune<4, strictMode>(_rune))
}
PutUTF8SixBits(_rune, ch); // [XXXYY YYYYZZZZ ZZQQQQQQ]
if (!IsValidUTF8Rune<4, strictMode>(_rune)) {
return RECODE_BROKENSYMBOL;
}
} else {
if (!IsValidUTF8Rune<3, strictMode>(_rune))
if (!IsValidUTF8Rune<3, strictMode>(_rune)) {
return RECODE_BROKENSYMBOL;
}
}
} else {
if (!IsValidUTF8Rune<2, strictMode>(_rune))
if (!IsValidUTF8Rune<2, strictMode>(_rune)) {
return RECODE_BROKENSYMBOL;
}
}
}
rune_len = _len;
Expand Down Expand Up @@ -315,32 +324,36 @@ Y_FORCE_INLINE RECODE_RESULT ReadUTF8CharAndAdvance(wchar32& rune, const unsigne
inline RECODE_RESULT SafeWriteUTF8Char(wchar32 rune, size_t& rune_len, unsigned char* s, size_t tail) {
rune_len = 0;
if (rune < 0x80) {
if (tail <= 0)
if (tail <= 0) {
return RECODE_EOOUTPUT;
}
*s = static_cast<unsigned char>(rune);
rune_len = 1;
return RECODE_OK;
}
if (rune < 0x800) {
if (tail <= 1)
if (tail <= 1) {
return RECODE_EOOUTPUT;
}
*s++ = static_cast<unsigned char>(0xC0 | (rune >> 6));
*s = static_cast<unsigned char>(0x80 | (rune & 0x3F));
rune_len = 2;
return RECODE_OK;
}
if (rune < 0x10000) {
if (tail <= 2)
if (tail <= 2) {
return RECODE_EOOUTPUT;
}
*s++ = static_cast<unsigned char>(0xE0 | (rune >> 12));
*s++ = static_cast<unsigned char>(0x80 | ((rune >> 6) & 0x3F));
*s = static_cast<unsigned char>(0x80 | (rune & 0x3F));
rune_len = 3;
return RECODE_OK;
}
/*if (rune < 0x200000)*/ {
if (tail <= 3)
if (tail <= 3) {
return RECODE_EOOUTPUT;
}
*s++ = static_cast<unsigned char>(0xF0 | ((rune >> 18) & 0x07));
*s++ = static_cast<unsigned char>(0x80 | ((rune >> 12) & 0x3F));
*s++ = static_cast<unsigned char>(0x80 | ((rune >> 6) & 0x3F));
Expand Down
14 changes: 9 additions & 5 deletions util/charset/wide.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,11 @@ namespace {
case '\"':
return Y_ARRAY_SIZE(QUOT);
default:
if (insertBr && (c == '\r' || c == '\n'))
if (insertBr && (c == '\r' || c == '\n')) {
return Y_ARRAY_SIZE(BR);
else
} else {
return 1;
}
}
}
} // namespace
Expand Down Expand Up @@ -579,11 +580,13 @@ void EscapeHtmlChars(TUtf16String& str) {

const TUtf16String& cs = str;

for (size_t i = 0; i < cs.size(); ++i)
for (size_t i = 0; i < cs.size(); ++i) {
escapedLen += EscapedLen<insertBr>(cs[i]);
}

if (escapedLen == cs.size())
if (escapedLen == cs.size()) {
return;
}

TUtf16String res;
res.reserve(escapedLen);
Expand All @@ -609,8 +612,9 @@ void EscapeHtmlChars(TUtf16String& str) {
if (insertBr && (cs[i] == '\r' || cs[i] == '\n')) {
ent = &br;
break;
} else
} else {
continue;
}
}

res.append(cs.begin() + start, cs.begin() + i);
Expand Down
18 changes: 12 additions & 6 deletions util/charset/wide.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,9 @@ inline const wchar32* SkipSymbol(const wchar32* begin, const wchar32* end) noexc
inline wchar32 ReadSymbol(const wchar16* begin, const wchar16* end) noexcept {
Y_ASSERT(begin < end);
if (IsW16SurrogateLead(*begin)) {
if (begin + 1 < end && IsW16SurrogateTail(*(begin + 1)))
if (begin + 1 < end && IsW16SurrogateTail(*(begin + 1))) {
return ::NDetail::ReadSurrogatePair(begin);
}

return BROKEN_RUNE;
} else if (IsW16SurrogateTail(*begin)) {
Expand Down Expand Up @@ -210,8 +211,9 @@ inline bool WriteSymbol(wchar32 s, wchar16*& dest, const wchar16* destEnd) noexc
return true;
}

if (dest + 2 > destEnd)
if (dest + 2 > destEnd) {
return false;
}

::NDetail::WriteSurrogatePair(s, dest);
} else {
Expand Down Expand Up @@ -362,8 +364,9 @@ inline TUtf16String UTF8ToWide(const char* text, size_t len) {
TUtf16String w = TUtf16String::Uninitialized(len);
size_t written;
size_t pos = UTF8ToWideImpl<robust>(text, len, w.begin(), written);
if (pos != len)
if (pos != len) {
ythrow yexception() << "failed to decode UTF-8 string at pos " << pos << ::NDetail::InStringMsg(text, len);
}
Y_ASSERT(w.size() >= written);
w.remove(written);
return w;
Expand Down Expand Up @@ -707,15 +710,17 @@ inline TUtf32String ASCIIToUTF32(const TStringBuf s) {

//! returns @c true if string contains whitespace characters only
inline bool IsSpace(const wchar16* s, size_t n) {
if (n == 0)
if (n == 0) {
return false;
}

Y_ASSERT(s);

const wchar16* const e = s + n;
for (const wchar16* p = s; p != e; ++p) {
if (!IsWhitespace(*p))
if (!IsWhitespace(*p)) {
return false;
}
}
return true;
}
Expand Down Expand Up @@ -879,8 +884,9 @@ inline bool IsValidUTF16(const wchar16* b, const wchar16* e) {
Y_ENSURE(b <= e, TStringBuf("invalid iterators"));
while (b < e) {
wchar32 symbol = ReadSymbolAndAdvance(b, e);
if (symbol == BROKEN_RUNE)
if (symbol == BROKEN_RUNE) {
return false;
}
}
return true;
}
Expand Down
Loading

0 comments on commit 2a0addd

Please sign in to comment.