Skip to content

Commit

Permalink
Tweak cmark_utf8proc_is_CJK
Browse files Browse the repository at this point in the history
  • Loading branch information
tats-u authored and jgm committed Aug 18, 2024
1 parent 389fda2 commit 4104bc1
Showing 1 changed file with 21 additions and 13 deletions.
34 changes: 21 additions & 13 deletions src/utf8.c
Original file line number Diff line number Diff line change
Expand Up @@ -436,41 +436,49 @@ int cmark_utf8proc_is_punctuation_or_symbol(int32_t uc) {
// CJK Compatibility Ideographs: U+F900 - U+FAFF
// CJK Compatibility Ideographs Supplement: U+2F800 - U+2FA1F
int cmark_utf8proc_is_CJK(int32_t uc) {
if (uc < 0x4e00) {
if (uc < 0x2e80) {
return 0;
} else {
return (
(uc >= 0x2e80 && uc <= 0x2eff) // CJK Radicals Supplement
(uc >= 0x2e80 && /* uc <= 0x2eff) // CJK Radicals Supplement
|| (uc >= 0x2f00 && uc <= 0x2fdf) // Kangxi Radicals
// || (uc >= 0x2fe0 && uc <= 0x2fef) // Unused region but blocks on both sides are CJK
|| (uc >= 0x2ff0 && uc <= 0x2fff) // Ideographic Description Characters
|| (uc >= 0x3000 && uc <= 0x303f) // JK Symbols and Punctuation
|| (uc >= 0x3000 && uc <= 0x303f) // CJK Symbols and Punctuation
|| (uc >= 0x3040 && uc <= 0x309f) // Hiragana
|| (uc >= 0x30a0 && uc <= 0x30ff) // Katakana
|| (uc >= 0x3100 && uc <= 0x312f) // Bopomofo
|| (uc >= 0x3130 && uc <= 0x318f) // Kanbun
|| (uc >= 0x3190 && uc <= 0x319f) // Kanbun
|| (uc >= 0x31a0 && uc <= 0x31bf) // Bopomofo Extended
|| (uc >= 0x31c0 && uc <= 0x31ef) // CJK Strokes
|| (uc >= 0x31f0 && uc <= 0x31ff) // Katakana Phonetic Extensions
|| (uc >= 0x3200 && uc <= 0x32ff) // Enclosed CJK Letters & Months
|| (uc >= 0x3300 && uc <= 0x33ff) // CJK Compatibility
|| (uc >= 0x3400 && uc <= 0x4dbf) // CJK Unified Ideographs Extension A
|| (uc >= 0x4e00 && uc <= 0x9fff) // CJK Unified Ideographs
|| (uc >= 0x3400 && */ uc <= 0x4dbf) // CJK Unified Ideographs Extension A
|| (uc >= 0x4e00 && /* uc <= 0x9fff) // CJK Unified Ideographs
|| (uc >= 0xa000 && uc <= 0xa48f) // Yi Syllables
|| (uc >= 0xa490 && uc <= 0xa4cf) // Yi Radicals
|| (uc >= 0xa490 && */ uc <= 0xa4cf) // Yi Radicals
|| (uc >= 0xf900 && uc <= 0xfaff) // CJK Compatibility Ideographs
|| (uc >= 0xfe10 && uc <= 0xfe1f) // Vertical forms
|| (uc >= 0xfe30 && uc <= 0xfe4f) // CJK Compatibility Forms
|| (uc >= 0xFE50 && uc <= 0xFE6F) // Small Form Variants
|| (uc >= 0xfe30 && /* uc <= 0xfe4f) // CJK Compatibility Forms
|| (uc >= 0xFE50 && */ uc <= 0xFE6F) // Small Form Variants
|| (uc >= 0xFF00 && uc <= 0xFFEE) // Halfwidth and Fullwidth Forms
|| (uc >= 0x1B000 && uc <= 0x1B0FF) // Kana Supplement
|| (uc >= 0x1B000 && /* uc <= 0x1B0FF) // Kana Supplement
|| (uc >= 0x1B100 && uc <= 0x1B12F) // Kana Extended-A
|| (uc >= 0x1B130 && uc <= 0x1B16F) // Small Kana Extension
|| (uc >= 0x20000 && uc <= 0x2A6DF) // CJK Unified Ideographs Extension B
|| (uc >= 0x1B130 && */ uc <= 0x1B16F) // Small Kana Extension
|| (uc >= 0x20000 && /* uc <= 0x2A6DF) // CJK Unified Ideographs Extension B
|| (uc >= 0x2A700 && uc <= 0x2B73F) // CJK Unified Ideographs Extension C
|| (uc >= 0x2B740 && uc <= 0x2B81F) // CJK Unified Ideographs Extension D
|| (uc >= 0x2B820 && uc <= 0x2CEAF) // CJK Unified Ideographs Extension E
|| (uc >= 0x2CEB0 && uc <= 0x2EBEF) // CJK Unified Ideographs Extension F
|| (uc >= 0x2F800 && uc <= 0x2FA1F) // CJK Compatibility Ideographs Supp
|| (uc >= 0x30000 && uc <= 0x3134F)); // CJK Unified Ideographs Exten
// || (uc >= 0x2EBF0 && uc <= 0x2F7FF) // Unused SIP region (probably CJK characters will be allocated)
|| (uc >= 0x2F800 && uc <= 0x2FA1F) // CJK Compatibility Ideographs Supplement
// || (uc >= 0x2FA20 && uc <= 0x2FFFF) // Unused SIP region (probably CJK characters will be allocated)
|| (uc >= 0x30000 && uc <= 0x3134F) // CJK Unified Ideographs Extension G
|| (uc >= 0x31350 && uc <= 0x323AF) // CJK Unified Ideographs Extension H
// || (uc >= 0x323B0 && */ uc <= 0x3FFFF) // Unused TIP region (probably CJK characters will be allocated)
|| (uc >= 0xE0100 && uc <= 0xE01FF) // Ideographic Variation Sequences
);
}
}

0 comments on commit 4104bc1

Please sign in to comment.