From ac882d9fa20a188e18bb93f3a28aee9317315b15 Mon Sep 17 00:00:00 2001 From: poire-z Date: Thu, 4 Jun 2020 16:45:15 +0200 Subject: [PATCH] CSS: content: open-quote support via TextLangMan Get the right quote chars for each language, and ensure nested quote levels (per lang_cfg). --- crengine/include/lvstsheet.h | 6 +- crengine/include/textlang.h | 13 ++ crengine/src/lvrend.cpp | 9 ++ crengine/src/lvstsheet.cpp | 145 +++++++++++++++++---- crengine/src/lvtinydom.cpp | 54 ++++++-- crengine/src/textlang.cpp | 246 ++++++++++++++++++++++++++++++++++- 6 files changed, 430 insertions(+), 43 deletions(-) diff --git a/crengine/include/lvstsheet.h b/crengine/include/lvstsheet.h index e5cba4725..53bbdf4e6 100644 --- a/crengine/include/lvstsheet.h +++ b/crengine/include/lvstsheet.h @@ -46,6 +46,7 @@ #include "cssdef.h" #include "lvstyles.h" +#include "textlang.h" class lxmlDocBase; class ldomNode; @@ -330,7 +331,10 @@ class LVStyleSheet { /// parse color value like #334455, #345 or red bool parse_color_value( const char * & str, css_length_t & value ); -/// get computed value for a node from its parsed CSS "content:" value +/// update (if needed) a style->content (parsed from the CSS declaration) before +// applying to a node's style +void update_style_content_property( css_style_rec_t * style, ldomNode * node ); +/// get the computed final text value for a node from its style->content lString16 get_applied_content_property( ldomNode * node ); /// extract @import filename from beginning of CSS diff --git a/crengine/include/textlang.h b/crengine/include/textlang.h index 8644ded69..fd568e2ad 100644 --- a/crengine/include/textlang.h +++ b/crengine/include/textlang.h @@ -80,6 +80,8 @@ class TextLangMan static HyphMethod * getMainLangHyphMethod(); // For HyphMan::hyphenate() + static void resetCounters(); + // For frontend info about TextLangMan status and seen langs static LVPtrVector * getLangCfgList() { return &_lang_cfg_list; @@ -99,6 +101,12 @@ class TextLangCfg lString16 _lang_tag; HyphMethod * _hyph_method; + lString16 _open_quote1; + lString16 _close_quote1; + lString16 _open_quote2; + lString16 _close_quote2; + int _quote_nesting_level; + #if USE_HARFBUZZ==1 hb_language_t _hb_language; #endif @@ -110,6 +118,8 @@ class TextLangCfg bool _duplicate_real_hyphen_on_next_line; + void resetCounters(); + public: lString16 getLangTag() const { return _lang_tag; } @@ -129,6 +139,9 @@ class TextLangCfg return _hyph_method; } + lString16 & getOpeningQuote( bool update_level=true ); + lString16 & getClosingQuote( bool update_level=true ); + #if USE_HARFBUZZ==1 hb_language_t getHBLanguage() const { return _hb_language; } #endif diff --git a/crengine/src/lvrend.cpp b/crengine/src/lvrend.cpp index a8392ccc3..cce1ca781 100755 --- a/crengine/src/lvrend.cpp +++ b/crengine/src/lvrend.cpp @@ -9054,6 +9054,15 @@ void setNodeStyle( ldomNode * enode, css_style_ref_t parent_style, LVFontRef par delete pstyle->pseudo_elem_after_style; pstyle->pseudo_elem_after_style = NULL; } + + if ( nodeElementId == el_pseudoElem ) { + // Pseudo element ->content may need some update if it contains + // any of the open-quote-like tokens, to account for the + // quoting nested levels. setNodeStyle() is actually the good + // place to do that, as we're visiting all the nodes recursively. + update_style_content_property(pstyle, enode); + } + pstyle->flags = 0; // cleanup, before setStyle() adds it to cache // set calculated style diff --git a/crengine/src/lvstsheet.cpp b/crengine/src/lvstsheet.cpp index 06179145c..0259a7b2f 100644 --- a/crengine/src/lvstsheet.cpp +++ b/crengine/src/lvstsheet.cpp @@ -899,6 +899,9 @@ bool parse_content_property( const char * & str, lString16 & parsed_content) // 'n' for 'no-close-quote' // 'u' for 'url()', that we don't support // 'z' for unsupported tokens, like gradient()... + // '$' (at start) this content needs post processing before + // being applied to a node's style (needed with quotes, + // to get the correct char for the current nested level). // Note: this parsing might not be super robust with // convoluted declarations... parsed_content.clear(); @@ -906,6 +909,7 @@ bool parse_content_property( const char * & str, lString16 & parsed_content) // The presence of a single 'none' or 'normal' among multiple // values make the whole thing 'none'. bool has_none = false; + bool needs_processing_when_applying = false; while ( skip_spaces( str ) && *str!=';' && *str!='}' && *str!='!' ) { if ( substr_icompare("none", str) ) { has_none = true; @@ -918,18 +922,22 @@ bool parse_content_property( const char * & str, lString16 & parsed_content) } else if ( substr_icompare("open-quote", str) ) { parsed_content << L'Q'; + needs_processing_when_applying = true; continue; } else if ( substr_icompare("close-quote", str) ) { parsed_content << L'q'; + needs_processing_when_applying = true; continue; } else if ( substr_icompare("no-open-quote", str) ) { parsed_content << L'N'; + needs_processing_when_applying = true; continue; } else if ( substr_icompare("no-close-quote", str) ) { parsed_content << L'n'; + needs_processing_when_applying = true; continue; } else if ( substr_icompare("attr", str) ) { @@ -1052,6 +1060,9 @@ bool parse_content_property( const char * & str, lString16 & parsed_content) parsed_content.clear(); parsed_content << L'X'; } + else if ( needs_processing_when_applying ) { + parsed_content.insert(0, 1, L'$'); + } if (*str) // something (;, } or !important) follows return true; // Restore original position if we reach end of CSS string, @@ -1062,6 +1073,104 @@ bool parse_content_property( const char * & str, lString16 & parsed_content) return false; } +/// Update a style->content, post processed for its node +void update_style_content_property( css_style_rec_t * style, ldomNode * node ) { + // We don't want to update too much: styles are hashed and shared by + // multiple nodes. We don't resolve "attr()" here as attributes are + // stable (and "attr(id)" would make all style->content different + // and prevent styles from being shared, increasing the number + // of styles to cache). + // But we need to resolve quotes, according to their nesting level, + // and transform them into a litteral string 's'. + + if ( style->content.empty() || style->content[0] != L'$' ) { + // No update needed + return; + } + + // We need to know if this node is visible: if not, quotes nested + // level should not be updated. We might want to still include + // the computed quote (with quote char for level 1) for it to be + // displayed by writeNodeEx() when displaying the HTML, even if + // the node is invisible. + bool visible = style->display != css_d_none; + if ( visible ) { + ldomNode * n = node->getParentNode(); + for ( ; !n->isRoot(); n = n->getParentNode() ) { + if ( n->getStyle()->display == css_d_none ) { + visible = false; + break; + } + } + } + + // We do not support specifying quote chars to be used via CSS "quotes": + // :root { quotes: '\201c' '\201d' '\2018' '\2019'; } + // We use the ones hardcoded for the node lang tag language (or default + // typography language) provided by TextLangCfg. + // HTML5 default CSS specifies them with: + // :root:lang(af), :not(:lang(af)) > :lang(af) { quotes: '\201c' '\201d' '\2018' '\2019' } + // This might (or not) implies that nested levels are reset when entering + // text with another language, so this new language first level quote is used. + // We can actually get that same behaviour by having each TextLangCfg manage + // its own nesting level (which won't be reset when en>fr>en, though). + // But all this is quite rare, so don't bother about it much. + TextLangCfg * lang_cfg = TextLangMan::getTextLangCfg( node ); + + // Note: some quote char like (U+201C / U+201D) seem to not be mirrored + // (when using HarfBuzz) when added to some RTL arabic text. But it + // appears that way with Firefox too! + // But if we use another char (U+00AB / U+00BB), it gets mirrored correctly. + // Might be that HarfBuzz first substitute it with arabic quotes (which + // happen to look inverted), and then mirror that? + + lString16 res; + lString16 parsed_content = style->content; + lString16 quote; + int i = 1; // skip initial '$' + int parsed_content_len = parsed_content.length(); + while ( i < parsed_content_len ) { + lChar16 ctype = parsed_content[i]; + if ( ctype == 's' ) { // literal string: copy as-is + lChar16 len = parsed_content[i]; + res.append(parsed_content, i, len+2); + i += len+2; + } + else if ( ctype == 'a' ) { // attribute value: copy as-is + lChar16 len = parsed_content[i]; + res.append(parsed_content, i, len+2); + i += len+2; + } + else if ( ctype == 'Q' ) { // open-quote + quote = lang_cfg->getOpeningQuote(visible); + res << L's' << quote.length() << quote; + i += 1; + } + else if ( ctype == 'q' ) { // close-quote + quote = lang_cfg->getClosingQuote(visible); + res << L's' << quote.length() << quote; + i += 1; + } + else if ( ctype == 'N' ) { // no-open-quote + // This should just increment nested quote level and output nothing. + lang_cfg->getOpeningQuote(visible); + i += 1; + } + else if ( ctype == 'n' ) { // no-close-quote + // This should just increment nested quote level and output nothing. + lang_cfg->getClosingQuote(visible); + i += 1; + } + else { + // All other stuff are single char (u, z, X) or unsupported/bogus char. + res.append(parsed_content, i, 1); + i += 1; + } + } + // Replace style->content with what we built + style->content = res; +} + /// Returns the computed value for a node from its parsed CSS "content:" value lString16 get_applied_content_property( ldomNode * node ) { lString16 res; @@ -1100,38 +1209,24 @@ lString16 get_applied_content_property( ldomNode * node ) { // res << 0x25FD; // WHITE MEDIUM SMALL SQUARE res << 0x2B26; // WHITE MEDIUM DIAMOND } + else if ( ctype == 'X' ) { // 'none' + res.clear(); // should be standalone, but let's be sure + break; + } + else if ( ctype == 'z' ) { // unsupported token + // Just ignore it, don't show anything + } else if ( ctype == 'Q' ) { // open-quote - // Add default quoting opening char - // We do not support showing a different char for multiple nested , - // and neither the way to specify this with CSS, ie: - // q::before { content: open-quote; } - // :root { quotes: '\201c' '\201d' '\2018' '\2019'; } - // todo: have the right quote char for a language provided by lang_cfg - res << 0x201C; - // Note: this specific char seem to not be mirrored (when using HarfBuzz) when - // added to some RTL arabic text. But it appears that way with Firefox too! - // But if we use another char (0x00AB / 0x00BB), it gets mirrored correctly. - // Might be that HarfBuzz first substitute it with arabic quotes (which happen - // to look inverted), and then mirror that? + // Shouldn't happen: replaced earlier by update_style_content_property() } else if ( ctype == 'q' ) { // close-quote - // Add default quoting closing char - res << 0x201D; + // Shouldn't happen: replaced earlier by update_style_content_property() } else if ( ctype == 'N' ) { // no-open-quote - // (This should just increment nested quote level if we supported that) - // Nothing to output + // Shouldn't happen: replaced earlier by update_style_content_property() } else if ( ctype == 'n' ) { // no-close-quote - // (This should just decrement nested quote level if we supported that) - // Nothing to output - } - else if ( ctype == 'X' ) { // 'none' - res.clear(); // should be standalone, but let's be sure - break; - } - else if ( ctype == 'z' ) { // unsupported token - // Just ignore it, don't show anything + // Shouldn't happen: replaced earlier by update_style_content_property() } else { // unexpected break; diff --git a/crengine/src/lvtinydom.cpp b/crengine/src/lvtinydom.cpp index f79c4343f..fe17aac44 100644 --- a/crengine/src/lvtinydom.cpp +++ b/crengine/src/lvtinydom.cpp @@ -4510,6 +4510,9 @@ bool ldomDocument::render( LVRendPageList * pages, LVDocViewCallback * callback, // create elements, but may be prevented from doing so by an existing cache file _boxingWishedButPreventedByCache = false; + // Reset counters (quotes nesting levels...) + TextLangMan::resetCounters(); + CRLog::trace("Save stylesheet..."); _stylesheet.push(); CRLog::trace("Init node styles..."); @@ -5018,16 +5021,24 @@ void ldomElementWriter::onBodyEnter() for ( int i=0; igetChildNode(i); if ( child->getNodeId() == el_pseudoElem ) { - // ->initNodeStyle() has been done when the element was created; - // as pseudo elements have no children, let's ->initNodeRendMethod() - // now (as done in onBodyExit()). - child->initNodeRendMethod(); - // ldomNode::ensurePseudoElement() will always have inserted - // "Before" first, and "After" second. But real children might - // soon be added, and we'll have to move "After" last when done. - // Which will be done in onBodyExit(). - if ( child->hasAttribute(attr_After) ) + if ( child->hasAttribute(attr_Before) ) { + // The "Before" pseudo element (not part of the XML) + // needs to have its style applied. As it has no + // children, we can also init its rend method. + child->initNodeStyle(); + child->initNodeRendMethod(); + } + else if ( child->hasAttribute(attr_After) ) { + // For the "After" pseudo element, we need to wait + // for all real children to be added, to move it + // as its right position (last), to init its style + // (because of "content:close-quote", whose nested + // level need to have seen all previous nodes to + // be accurate) and its rendering method. + // We'll do that in onBodyExit() when called for + // this node. _pseudoElementAfterChildIndex = i; + } } } } @@ -5098,9 +5109,18 @@ void ldomNode::ensurePseudoElement( bool is_before ) { lUInt16 attribute_id = is_before ? attr_Before : attr_After; pseudo->setAttributeValue(LXML_NS_NONE, attribute_id, L""); // We are called by lvrend.cpp setNodeStyle(), after the parent - // style and font have been fully set up. - // We can set this pseudo element style as it can now properly inherit. - pseudo->initNodeStyle(); + // style and font have been fully set up. We could set this pseudo + // element style with pseudo->initNodeStyle(), as it can inherit + // properly, but we should not: + // - when re-rendering, initNodeStyleRecursive()/updateStyleDataRecursive() + // will iterate thru this node we just added as a child, and do it. + // - when XML loading, we could do it for the "Before" pseudo element, + // but for the "After" one, we need to wait for all real children to be + // added and have their style applied - just because they can change + // open-quote/close-quote nesting levels - to be sure we get the + // proper nesting level quote char for the After node. + // So, for the XML loading phase, we do that in onBodyEnter() and + // onBodyExit() when called on the parent node. } } @@ -6726,10 +6746,16 @@ void ldomElementWriter::onBodyExit() if ( _pseudoElementAfterChildIndex >= 0 ) { if ( _pseudoElementAfterChildIndex != _element->getChildCount()-1 ) { // Not the last child: move it there - // printf("moving After from %d to %d\n", _pseudoElementAfterChildIndex, _element->getChildCount()-1); - // moveItemsTo() just works to remove it, and re-add it (so, adding it at the end) + // (moveItemsTo() works just fine when the source node is also the + // target node: remove it, and re-add it, so, adding it at the end) _element->moveItemsTo( _element, _pseudoElementAfterChildIndex, _pseudoElementAfterChildIndex); } + // Now that all the real children of this node have had their + // style set, we can init the style of the "After" pseudo + // element, and its rend method as it has no children. + ldomNode * child = _element->getChildNode(_element->getChildCount()-1); + child->initNodeStyle(); + child->initNodeRendMethod(); } // if ( _element->getStyle().isNull() ) { // lString16 path; diff --git a/crengine/src/textlang.cpp b/crengine/src/textlang.cpp index e3e17e12a..c5bc9edc6 100644 --- a/crengine/src/textlang.cpp +++ b/crengine/src/textlang.cpp @@ -232,9 +232,209 @@ HyphMethod * TextLangMan::getMainLangHyphMethod() { return getTextLangCfg()->getHyphMethod(); } +void TextLangMan::resetCounters() { + for ( int i=0; i<_lang_cfg_list.length(); i++ ) { + _lang_cfg_list[i]->resetCounters(); + } +} // TextLangCfg object: per language holder of language specificities +// For CSS "content: open-quote / close-quote" +typedef struct quotes_spec { + const char * lang_tag; + const lChar16 * open_quote_level_1; + const lChar16 * close_quote_level_1; + const lChar16 * open_quote_level_2; + const lChar16 * close_quote_level_2; +} quotes_spec; + +// List built 20200601 from https://html.spec.whatwg.org/multipage/rendering.html#quotes +// 2nd part of lang_tag lowercased for easier comparison, and if multiple +// lang_tag with the same starting chars, put the longest first. +// Small issue: 3-letters lang tag not specified here might match +// a 2-letter lang tag specified here ("ito" will get those from "it"). +static quotes_spec _quotes_spec_table[] = { + { "af", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "agq", L"\x201e", L"\x201d", L"\x201a", L"\x2019" }, /* „ ” ‚ ’ */ + { "ak", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "am", L"\x00ab", L"\x00bb", L"\x2039", L"\x203a" }, /* « » ‹ › */ + { "ar", L"\x201d", L"\x201c", L"\x2019", L"\x2018" }, /* ” “ ’ ‘ */ + { "asa", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "ast", L"\x00ab", L"\x00bb", L"\x201c", L"\x201d" }, /* « » “ ” */ + { "az-cyrl", L"\x00ab", L"\x00bb", L"\x2039", L"\x203a" }, /* « » ‹ › */ + { "az", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "bas", L"\x00ab", L"\x00bb", L"\x201e", L"\x201c" }, /* « » „ “ */ + { "bem", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "bez", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "be", L"\x00ab", L"\x00bb", L"\x201e", L"\x201c" }, /* « » „ “ */ + { "bg", L"\x201e", L"\x201c", L"\x201e", L"\x201c" }, /* „ “ „ “ */ + { "bm", L"\x00ab", L"\x00bb", L"\x201c", L"\x201d" }, /* « » “ ” */ + { "bn", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "brx", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "br", L"\x00ab", L"\x00bb", L"\x201c", L"\x201d" }, /* « » “ ” */ + { "bs-cyrl", L"\x201e", L"\x201c", L"\x201a", L"\x2018" }, /* „ “ ‚ ‘ */ + { "bs", L"\x201e", L"\x201d", L"\x2018", L"\x2019" }, /* „ ” ‘ ’ */ + { "ca", L"\x00ab", L"\x00bb", L"\x201c", L"\x201d" }, /* « » “ ” */ + { "cgg", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "chr", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "cs", L"\x201e", L"\x201c", L"\x201a", L"\x2018" }, /* „ “ ‚ ‘ */ + { "cy", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "dav", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "da", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "de", L"\x201e", L"\x201c", L"\x201a", L"\x2018" }, /* „ “ ‚ ‘ */ + { "dje", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "dsb", L"\x201e", L"\x201c", L"\x201a", L"\x2018" }, /* „ “ ‚ ‘ */ + { "dua", L"\x00ab", L"\x00bb", L"\x2018", L"\x2019" }, /* « » ‘ ’ */ + { "dyo", L"\x00ab", L"\x00bb", L"\x201c", L"\x201d" }, /* « » “ ” */ + { "dz", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "ebu", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "ee", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "el", L"\x00ab", L"\x00bb", L"\x201c", L"\x201d" }, /* « » “ ” */ + { "en", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "es", L"\x00ab", L"\x00bb", L"\x201c", L"\x201d" }, /* « » “ ” */ + { "et", L"\x201e", L"\x201c", L"\x201a", L"\x2018" }, /* „ “ ‚ ‘ */ + { "eu", L"\x201c", L"\x201d", L"\x201c", L"\x201d" }, /* “ ” “ ” */ + { "ewo", L"\x00ab", L"\x00bb", L"\x201c", L"\x201d" }, /* « » “ ” */ + { "fa", L"\x00ab", L"\x00bb", L"\x2039", L"\x203a" }, /* « » ‹ › */ + { "ff", L"\x201e", L"\x201d", L"\x201a", L"\x2019" }, /* „ ” ‚ ’ */ + { "fil", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "fi", L"\x201d", L"\x201d", L"\x2019", L"\x2019" }, /* ” ” ’ ’ */ + { "fo", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "fr-ch", L"\x00ab", L"\x00bb", L"\x2039", L"\x203a" }, /* « » ‹ › */ + // { "fr", L"\x00ab", L"\x00bb", L"\x00ab", L"\x00bb" }, /* « » « » */ /* Same pair for both level, bit sad... */ + { "fr", L"\x00ab", L"\x00bb", L"\x201c", L"\x201d" }, /* « » “ ” */ /* Better to have "fr" just as "it" */ + { "ga", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "gd", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "gl", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "gsw", L"\x00ab", L"\x00bb", L"\x2039", L"\x203a" }, /* « » ‹ › */ + { "guz", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "gu", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "ha", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "he", L"\x201d", L"\x201d", L"\x2019", L"\x2019" }, /* ” ” ’ ’ */ + { "hi", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "hr", L"\x201e", L"\x201c", L"\x201a", L"\x2018" }, /* „ “ ‚ ‘ */ + { "hsb", L"\x201e", L"\x201c", L"\x201a", L"\x2018" }, /* „ “ ‚ ‘ */ + { "hu", L"\x201e", L"\x201d", L"\x00bb", L"\x00ab" }, /* „ ” » « */ + { "hy", L"\x00ab", L"\x00bb", L"\x00ab", L"\x00bb" }, /* « » « » */ + { "id", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "ig", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "is", L"\x201e", L"\x201c", L"\x201a", L"\x2018" }, /* „ “ ‚ ‘ */ + { "it", L"\x00ab", L"\x00bb", L"\x201c", L"\x201d" }, /* « » “ ” */ + { "ja", L"\x300c", L"\x300d", L"\x300e", L"\x300f" }, /* 「 」 『 』 */ + { "jgo", L"\x00ab", L"\x00bb", L"\x2039", L"\x203a" }, /* « » ‹ › */ + { "jmc", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "kab", L"\x00ab", L"\x00bb", L"\x201c", L"\x201d" }, /* « » “ ” */ + { "kam", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "ka", L"\x201e", L"\x201c", L"\x00ab", L"\x00bb" }, /* „ “ « » */ + { "kde", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "kea", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "khq", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "ki", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "kkj", L"\x00ab", L"\x00bb", L"\x2039", L"\x203a" }, /* « » ‹ › */ + { "kk", L"\x00ab", L"\x00bb", L"\x201c", L"\x201d" }, /* « » “ ” */ + { "kln", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "km", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "kn", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "ko", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "ksb", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "ksf", L"\x00ab", L"\x00bb", L"\x2018", L"\x2019" }, /* « » ‘ ’ */ + { "ky", L"\x00ab", L"\x00bb", L"\x201e", L"\x201c" }, /* « » „ “ */ + { "lag", L"\x201d", L"\x201d", L"\x2019", L"\x2019" }, /* ” ” ’ ’ */ + { "lb", L"\x201e", L"\x201c", L"\x201a", L"\x2018" }, /* „ “ ‚ ‘ */ + { "lg", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "ln", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "lo", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "lrc", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "lt", L"\x201e", L"\x201c", L"\x201e", L"\x201c" }, /* „ “ „ “ */ + { "luo", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "luy", L"\x201e", L"\x201c", L"\x201a", L"\x2018" }, /* „ “ ‚ ‘ */ + { "lu", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "lv", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "mas", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "mer", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "mfe", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "mgo", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "mg", L"\x00ab", L"\x00bb", L"\x201c", L"\x201d" }, /* « » “ ” */ + { "mk", L"\x201e", L"\x201c", L"\x201a", L"\x2018" }, /* „ “ ‚ ‘ */ + { "ml", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "mn", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "mr", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "ms", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "mt", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "mua", L"\x00ab", L"\x00bb", L"\x201c", L"\x201d" }, /* « » “ ” */ + { "my", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "mzn", L"\x00ab", L"\x00bb", L"\x2039", L"\x203a" }, /* « » ‹ › */ + { "naq", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "nb", L"\x00ab", L"\x00bb", L"\x2018", L"\x2019" }, /* « » ‘ ’ */ + { "nd", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "ne", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "nl", L"\x2018", L"\x2019", L"\x201c", L"\x201d" }, /* ‘ ’ “ ” */ + { "nmg", L"\x201e", L"\x201d", L"\x00ab", L"\x00bb" }, /* „ ” « » */ + { "nnh", L"\x00ab", L"\x00bb", L"\x201c", L"\x201d" }, /* « » “ ” */ + { "nn", L"\x00ab", L"\x00bb", L"\x2018", L"\x2019" }, /* « » ‘ ’ */ + { "nus", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "nyn", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "pa", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "pl", L"\x201e", L"\x201d", L"\x00ab", L"\x00bb" }, /* „ ” « » */ + { "pt-pt", L"\x00ab", L"\x00bb", L"\x201c", L"\x201d" }, /* « » “ ” */ + { "pt", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "rn", L"\x201d", L"\x201d", L"\x2019", L"\x2019" }, /* ” ” ’ ’ */ + { "rof", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "ro", L"\x201e", L"\x201d", L"\x00ab", L"\x00bb" }, /* „ ” « » */ + { "ru", L"\x00ab", L"\x00bb", L"\x201e", L"\x201c" }, /* « » „ “ */ + { "rwk", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "rw", L"\x00ab", L"\x00bb", L"\x2018", L"\x2019" }, /* « » ‘ ’ */ + { "sah", L"\x00ab", L"\x00bb", L"\x201e", L"\x201c" }, /* « » „ “ */ + { "saq", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "sbp", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "seh", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "ses", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "sg", L"\x00ab", L"\x00bb", L"\x201c", L"\x201d" }, /* « » “ ” */ + { "shi-latn", L"\x00ab", L"\x00bb", L"\x201e", L"\x201d" }, /* « » „ ” */ + { "shi", L"\x00ab", L"\x00bb", L"\x201e", L"\x201d" }, /* « » „ ” */ + { "si", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "sk", L"\x201e", L"\x201c", L"\x201a", L"\x2018" }, /* „ “ ‚ ‘ */ + { "sl", L"\x201e", L"\x201c", L"\x201a", L"\x2018" }, /* „ “ ‚ ‘ */ + { "sn", L"\x201d", L"\x201d", L"\x2019", L"\x2019" }, /* ” ” ’ ’ */ + { "so", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "sq", L"\x00ab", L"\x00bb", L"\x201c", L"\x201d" }, /* « » “ ” */ + { "sr-latn", L"\x201e", L"\x201c", L"\x2018", L"\x2018" }, /* „ “ ‘ ‘ */ + { "sr", L"\x201e", L"\x201c", L"\x2018", L"\x2018" }, /* „ “ ‘ ‘ */ + { "sv", L"\x201d", L"\x201d", L"\x2019", L"\x2019" }, /* ” ” ’ ’ */ + { "sw", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "ta", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "teo", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "te", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "th", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "ti-er", L"\x2018", L"\x2019", L"\x201c", L"\x201d" }, /* ‘ ’ “ ” */ + { "tk", L"\x201c", L"\x201d", L"\x201c", L"\x201d" }, /* “ ” “ ” */ + { "to", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "tr", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "twq", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "tzm", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "uk", L"\x00ab", L"\x00bb", L"\x201e", L"\x201c" }, /* « » „ “ */ + { "ur", L"\x201d", L"\x201c", L"\x2019", L"\x2018" }, /* ” “ ’ ‘ */ + { "uz-cyrl", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "uz", L"\x201c", L"\x201d", L"\x2019", L"\x2018" }, /* “ ” ’ ‘ */ + { "vai-latn", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "vai", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "vi", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "vun", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "xog", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "yav", L"\x00ab", L"\x00bb", L"\x00ab", L"\x00bb" }, /* « » « » */ + { "yo", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "yue-hans", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "yue", L"\x300c", L"\x300d", L"\x300e", L"\x300f" }, /* 「 」 『 』 */ + { "zgh", L"\x00ab", L"\x00bb", L"\x201e", L"\x201d" }, /* « » „ ” */ + { "zh-hant", L"\x300c", L"\x300d", L"\x300e", L"\x300f" }, /* 「 」 『 』 */ + { "zh", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "zu", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { NULL, NULL, NULL, NULL, NULL } +}; +// Default to quotes for English +static quotes_spec _quotes_spec_default = { "", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }; + #if USE_LIBUNIBREAK==1 lChar16 lb_char_sub_func_polish(const lChar16 * text, int pos, int next_usable) { // https://github.com/koreader/koreader/issues/5645#issuecomment-559193057 @@ -297,9 +497,6 @@ lChar16 lb_char_sub_func_czech_slovak(const lChar16 * text, int pos, int next_us } #endif -TextLangCfg::~TextLangCfg() { -} - // Instantiate a new TextLangCfg with properties adequate to the provided lang_tag TextLangCfg::TextLangCfg( lString16 lang_tag ) { if ( TextLangMan::_no_hyph_method == NULL ) { @@ -464,4 +661,47 @@ TextLangCfg::TextLangCfg( lString16 lang_tag ) { _duplicate_real_hyphen_on_next_line = true; } #endif + + // Language default opening and closing quotes, for CSS + // "q::before { content: open-quote }" and + // "q::after { content: close-quote }" + quotes_spec * quotes = &_quotes_spec_default; + for (int i=0; _quotes_spec_table[i].lang_tag!=NULL; i++) { + if ( lang_tag.startsWith( _quotes_spec_table[i].lang_tag ) ) { + quotes = &_quotes_spec_table[i]; + break; + } + } + // Avoid a wrap after/before an opening/close quote. + const lChar16 * quote_joiner = L"\x2060"; + // (Zero width, equivalent to deprecated ZERO WIDTH NO-BREAK SPACE) + // We might want with some languages to use a non-breaking thin space instead. + + _open_quote1 << quotes->open_quote_level_1 << quote_joiner; + _close_quote1 << quote_joiner << quotes->close_quote_level_1; + _open_quote2 << quotes->open_quote_level_2 << quote_joiner; + _close_quote2 << quote_joiner << quotes->close_quote_level_2; + + resetCounters(); +} + +TextLangCfg::~TextLangCfg() { +} + +void TextLangCfg::resetCounters() { + _quote_nesting_level = 0; +} + +lString16 & TextLangCfg::getOpeningQuote( bool update_level ) { + if ( !update_level ) + return _open_quote1; + _quote_nesting_level++; + return (_quote_nesting_level % 2) ? _open_quote1 : _open_quote2; +} + +lString16 & TextLangCfg::getClosingQuote( bool update_level ) { + if ( !update_level ) + return _close_quote1; + _quote_nesting_level--; + return ((_quote_nesting_level+1) % 2) ? _close_quote1 : _close_quote2; }