Skip to content

Commit

Permalink
CSS: content: open-quote support via TextLangMan
Browse files Browse the repository at this point in the history
Get the right quote chars for each language,
and ensure nested quote levels (per lang_cfg).
  • Loading branch information
poire-z committed Jun 5, 2020
1 parent 81fd1be commit ac882d9
Show file tree
Hide file tree
Showing 6 changed files with 430 additions and 43 deletions.
6 changes: 5 additions & 1 deletion crengine/include/lvstsheet.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@

#include "cssdef.h"
#include "lvstyles.h"
#include "textlang.h"

class lxmlDocBase;
class ldomNode;
Expand Down Expand Up @@ -330,7 +331,10 @@ class LVStyleSheet {
/// parse color value like #334455, #345 or red
bool parse_color_value( const char * & str, css_length_t & value );

/// get computed value for a node from its parsed CSS "content:" value
/// update (if needed) a style->content (parsed from the CSS declaration) before
// applying to a node's style
void update_style_content_property( css_style_rec_t * style, ldomNode * node );
/// get the computed final text value for a node from its style->content
lString16 get_applied_content_property( ldomNode * node );

/// extract @import filename from beginning of CSS
Expand Down
13 changes: 13 additions & 0 deletions crengine/include/textlang.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ class TextLangMan

static HyphMethod * getMainLangHyphMethod(); // For HyphMan::hyphenate()

static void resetCounters();

// For frontend info about TextLangMan status and seen langs
static LVPtrVector<TextLangCfg> * getLangCfgList() {
return &_lang_cfg_list;
Expand All @@ -99,6 +101,12 @@ class TextLangCfg
lString16 _lang_tag;
HyphMethod * _hyph_method;

lString16 _open_quote1;
lString16 _close_quote1;
lString16 _open_quote2;
lString16 _close_quote2;
int _quote_nesting_level;

#if USE_HARFBUZZ==1
hb_language_t _hb_language;
#endif
Expand All @@ -110,6 +118,8 @@ class TextLangCfg

bool _duplicate_real_hyphen_on_next_line;

void resetCounters();

public:
lString16 getLangTag() const { return _lang_tag; }

Expand All @@ -129,6 +139,9 @@ class TextLangCfg
return _hyph_method;
}

lString16 & getOpeningQuote( bool update_level=true );
lString16 & getClosingQuote( bool update_level=true );

#if USE_HARFBUZZ==1
hb_language_t getHBLanguage() const { return _hb_language; }
#endif
Expand Down
9 changes: 9 additions & 0 deletions crengine/src/lvrend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9054,6 +9054,15 @@ void setNodeStyle( ldomNode * enode, css_style_ref_t parent_style, LVFontRef par
delete pstyle->pseudo_elem_after_style;
pstyle->pseudo_elem_after_style = NULL;
}

if ( nodeElementId == el_pseudoElem ) {
// Pseudo element ->content may need some update if it contains
// any of the open-quote-like tokens, to account for the
// quoting nested levels. setNodeStyle() is actually the good
// place to do that, as we're visiting all the nodes recursively.
update_style_content_property(pstyle, enode);
}

pstyle->flags = 0; // cleanup, before setStyle() adds it to cache

// set calculated style
Expand Down
145 changes: 120 additions & 25 deletions crengine/src/lvstsheet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -899,13 +899,17 @@ bool parse_content_property( const char * & str, lString16 & parsed_content)
// 'n' for 'no-close-quote'
// 'u' for 'url()', that we don't support
// 'z' for unsupported tokens, like gradient()...
// '$' (at start) this content needs post processing before
// being applied to a node's style (needed with quotes,
// to get the correct char for the current nested level).
// Note: this parsing might not be super robust with
// convoluted declarations...
parsed_content.clear();
const char * orig_pos = str;
// The presence of a single 'none' or 'normal' among multiple
// values make the whole thing 'none'.
bool has_none = false;
bool needs_processing_when_applying = false;
while ( skip_spaces( str ) && *str!=';' && *str!='}' && *str!='!' ) {
if ( substr_icompare("none", str) ) {
has_none = true;
Expand All @@ -918,18 +922,22 @@ bool parse_content_property( const char * & str, lString16 & parsed_content)
}
else if ( substr_icompare("open-quote", str) ) {
parsed_content << L'Q';
needs_processing_when_applying = true;
continue;
}
else if ( substr_icompare("close-quote", str) ) {
parsed_content << L'q';
needs_processing_when_applying = true;
continue;
}
else if ( substr_icompare("no-open-quote", str) ) {
parsed_content << L'N';
needs_processing_when_applying = true;
continue;
}
else if ( substr_icompare("no-close-quote", str) ) {
parsed_content << L'n';
needs_processing_when_applying = true;
continue;
}
else if ( substr_icompare("attr", str) ) {
Expand Down Expand Up @@ -1052,6 +1060,9 @@ bool parse_content_property( const char * & str, lString16 & parsed_content)
parsed_content.clear();
parsed_content << L'X';
}
else if ( needs_processing_when_applying ) {
parsed_content.insert(0, 1, L'$');
}
if (*str) // something (;, } or !important) follows
return true;
// Restore original position if we reach end of CSS string,
Expand All @@ -1062,6 +1073,104 @@ bool parse_content_property( const char * & str, lString16 & parsed_content)
return false;
}

/// Update a style->content, post processed for its node
void update_style_content_property( css_style_rec_t * style, ldomNode * node ) {
// We don't want to update too much: styles are hashed and shared by
// multiple nodes. We don't resolve "attr()" here as attributes are
// stable (and "attr(id)" would make all style->content different
// and prevent styles from being shared, increasing the number
// of styles to cache).
// But we need to resolve quotes, according to their nesting level,
// and transform them into a litteral string 's'.

if ( style->content.empty() || style->content[0] != L'$' ) {
// No update needed
return;
}

// We need to know if this node is visible: if not, quotes nested
// level should not be updated. We might want to still include
// the computed quote (with quote char for level 1) for it to be
// displayed by writeNodeEx() when displaying the HTML, even if
// the node is invisible.
bool visible = style->display != css_d_none;
if ( visible ) {
ldomNode * n = node->getParentNode();
for ( ; !n->isRoot(); n = n->getParentNode() ) {
if ( n->getStyle()->display == css_d_none ) {
visible = false;
break;
}
}
}

// We do not support specifying quote chars to be used via CSS "quotes":
// :root { quotes: '\201c' '\201d' '\2018' '\2019'; }
// We use the ones hardcoded for the node lang tag language (or default
// typography language) provided by TextLangCfg.
// HTML5 default CSS specifies them with:
// :root:lang(af), :not(:lang(af)) > :lang(af) { quotes: '\201c' '\201d' '\2018' '\2019' }
// This might (or not) implies that nested levels are reset when entering
// text with another language, so this new language first level quote is used.
// We can actually get that same behaviour by having each TextLangCfg manage
// its own nesting level (which won't be reset when en>fr>en, though).
// But all this is quite rare, so don't bother about it much.
TextLangCfg * lang_cfg = TextLangMan::getTextLangCfg( node );

// Note: some quote char like (U+201C / U+201D) seem to not be mirrored
// (when using HarfBuzz) when added to some RTL arabic text. But it
// appears that way with Firefox too!
// But if we use another char (U+00AB / U+00BB), it gets mirrored correctly.
// Might be that HarfBuzz first substitute it with arabic quotes (which
// happen to look inverted), and then mirror that?

lString16 res;
lString16 parsed_content = style->content;
lString16 quote;
int i = 1; // skip initial '$'
int parsed_content_len = parsed_content.length();
while ( i < parsed_content_len ) {
lChar16 ctype = parsed_content[i];
if ( ctype == 's' ) { // literal string: copy as-is
lChar16 len = parsed_content[i];
res.append(parsed_content, i, len+2);
i += len+2;
}
else if ( ctype == 'a' ) { // attribute value: copy as-is
lChar16 len = parsed_content[i];
res.append(parsed_content, i, len+2);
i += len+2;
}
else if ( ctype == 'Q' ) { // open-quote
quote = lang_cfg->getOpeningQuote(visible);
res << L's' << quote.length() << quote;
i += 1;
}
else if ( ctype == 'q' ) { // close-quote
quote = lang_cfg->getClosingQuote(visible);
res << L's' << quote.length() << quote;
i += 1;
}
else if ( ctype == 'N' ) { // no-open-quote
// This should just increment nested quote level and output nothing.
lang_cfg->getOpeningQuote(visible);
i += 1;
}
else if ( ctype == 'n' ) { // no-close-quote
// This should just increment nested quote level and output nothing.
lang_cfg->getClosingQuote(visible);
i += 1;
}
else {
// All other stuff are single char (u, z, X) or unsupported/bogus char.
res.append(parsed_content, i, 1);
i += 1;
}
}
// Replace style->content with what we built
style->content = res;
}

/// Returns the computed value for a node from its parsed CSS "content:" value
lString16 get_applied_content_property( ldomNode * node ) {
lString16 res;
Expand Down Expand Up @@ -1100,38 +1209,24 @@ lString16 get_applied_content_property( ldomNode * node ) {
// res << 0x25FD; // WHITE MEDIUM SMALL SQUARE
res << 0x2B26; // WHITE MEDIUM DIAMOND
}
else if ( ctype == 'X' ) { // 'none'
res.clear(); // should be standalone, but let's be sure
break;
}
else if ( ctype == 'z' ) { // unsupported token
// Just ignore it, don't show anything
}
else if ( ctype == 'Q' ) { // open-quote
// Add default quoting opening char
// We do not support showing a different char for multiple nested <q>,
// and neither the way to specify this with CSS, ie:
// q::before { content: open-quote; }
// :root { quotes: '\201c' '\201d' '\2018' '\2019'; }
// todo: have the right quote char for a language provided by lang_cfg
res << 0x201C;
// Note: this specific char seem to not be mirrored (when using HarfBuzz) when
// added to some RTL arabic text. But it appears that way with Firefox too!
// But if we use another char (0x00AB / 0x00BB), it gets mirrored correctly.
// Might be that HarfBuzz first substitute it with arabic quotes (which happen
// to look inverted), and then mirror that?
// Shouldn't happen: replaced earlier by update_style_content_property()
}
else if ( ctype == 'q' ) { // close-quote
// Add default quoting closing char
res << 0x201D;
// Shouldn't happen: replaced earlier by update_style_content_property()
}
else if ( ctype == 'N' ) { // no-open-quote
// (This should just increment nested quote level if we supported that)
// Nothing to output
// Shouldn't happen: replaced earlier by update_style_content_property()
}
else if ( ctype == 'n' ) { // no-close-quote
// (This should just decrement nested quote level if we supported that)
// Nothing to output
}
else if ( ctype == 'X' ) { // 'none'
res.clear(); // should be standalone, but let's be sure
break;
}
else if ( ctype == 'z' ) { // unsupported token
// Just ignore it, don't show anything
// Shouldn't happen: replaced earlier by update_style_content_property()
}
else { // unexpected
break;
Expand Down
54 changes: 40 additions & 14 deletions crengine/src/lvtinydom.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4510,6 +4510,9 @@ bool ldomDocument::render( LVRendPageList * pages, LVDocViewCallback * callback,
// create elements, but may be prevented from doing so by an existing cache file
_boxingWishedButPreventedByCache = false;

// Reset counters (quotes nesting levels...)
TextLangMan::resetCounters();

CRLog::trace("Save stylesheet...");
_stylesheet.push();
CRLog::trace("Init node styles...");
Expand Down Expand Up @@ -5018,16 +5021,24 @@ void ldomElementWriter::onBodyEnter()
for ( int i=0; i<nb_children; i++ ) {
ldomNode * child = _element->getChildNode(i);
if ( child->getNodeId() == el_pseudoElem ) {
// ->initNodeStyle() has been done when the element was created;
// as pseudo elements have no children, let's ->initNodeRendMethod()
// now (as done in onBodyExit()).
child->initNodeRendMethod();
// ldomNode::ensurePseudoElement() will always have inserted
// "Before" first, and "After" second. But real children might
// soon be added, and we'll have to move "After" last when done.
// Which will be done in onBodyExit().
if ( child->hasAttribute(attr_After) )
if ( child->hasAttribute(attr_Before) ) {
// The "Before" pseudo element (not part of the XML)
// needs to have its style applied. As it has no
// children, we can also init its rend method.
child->initNodeStyle();
child->initNodeRendMethod();
}
else if ( child->hasAttribute(attr_After) ) {
// For the "After" pseudo element, we need to wait
// for all real children to be added, to move it
// as its right position (last), to init its style
// (because of "content:close-quote", whose nested
// level need to have seen all previous nodes to
// be accurate) and its rendering method.
// We'll do that in onBodyExit() when called for
// this node.
_pseudoElementAfterChildIndex = i;
}
}
}
}
Expand Down Expand Up @@ -5098,9 +5109,18 @@ void ldomNode::ensurePseudoElement( bool is_before ) {
lUInt16 attribute_id = is_before ? attr_Before : attr_After;
pseudo->setAttributeValue(LXML_NS_NONE, attribute_id, L"");
// We are called by lvrend.cpp setNodeStyle(), after the parent
// style and font have been fully set up.
// We can set this pseudo element style as it can now properly inherit.
pseudo->initNodeStyle();
// style and font have been fully set up. We could set this pseudo
// element style with pseudo->initNodeStyle(), as it can inherit
// properly, but we should not:
// - when re-rendering, initNodeStyleRecursive()/updateStyleDataRecursive()
// will iterate thru this node we just added as a child, and do it.
// - when XML loading, we could do it for the "Before" pseudo element,
// but for the "After" one, we need to wait for all real children to be
// added and have their style applied - just because they can change
// open-quote/close-quote nesting levels - to be sure we get the
// proper nesting level quote char for the After node.
// So, for the XML loading phase, we do that in onBodyEnter() and
// onBodyExit() when called on the parent node.
}
}

Expand Down Expand Up @@ -6726,10 +6746,16 @@ void ldomElementWriter::onBodyExit()
if ( _pseudoElementAfterChildIndex >= 0 ) {
if ( _pseudoElementAfterChildIndex != _element->getChildCount()-1 ) {
// Not the last child: move it there
// printf("moving After from %d to %d\n", _pseudoElementAfterChildIndex, _element->getChildCount()-1);
// moveItemsTo() just works to remove it, and re-add it (so, adding it at the end)
// (moveItemsTo() works just fine when the source node is also the
// target node: remove it, and re-add it, so, adding it at the end)
_element->moveItemsTo( _element, _pseudoElementAfterChildIndex, _pseudoElementAfterChildIndex);
}
// Now that all the real children of this node have had their
// style set, we can init the style of the "After" pseudo
// element, and its rend method as it has no children.
ldomNode * child = _element->getChildNode(_element->getChildCount()-1);
child->initNodeStyle();
child->initNodeRendMethod();
}
// if ( _element->getStyle().isNull() ) {
// lString16 path;
Expand Down
Loading

0 comments on commit ac882d9

Please sign in to comment.