From 2ab925754ccf7e275d2bc9117e9c751d8cfe71ba Mon Sep 17 00:00:00 2001 From: poire-z Date: Thu, 4 Jun 2020 16:44:56 +0200 Subject: [PATCH 01/11] GIF decoding: avoid crash on some images With some image, we would be writting outside rev_buf array bounds. That's supposed to be driven by some other data, and should not happen - so there might be a bug somewhere else and we might have crap image data. Anyway, avoid this crash. https://github.com/koreader/koreader/issues/6215 --- crengine/src/lvimg.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crengine/src/lvimg.cpp b/crengine/src/lvimg.cpp index 6f839b502..2ca9794cd 100644 --- a/crengine/src/lvimg.cpp +++ b/crengine/src/lvimg.cpp @@ -1294,7 +1294,7 @@ class CLZWDecoder do { rev_buf[pos++] = str_table[code]; code = str_nextchar[code]; - } while (code>=0); + } while (code>=0 && pos < LSWDECODER_MAX_TABLE_SIZE/2); while (--pos>=0) { if (!WriteOutChar(rev_buf[pos])) return 0; @@ -1442,7 +1442,7 @@ class CLZWDecoder // return 0; // table overflow {} // Ignore table overflow, which seems ok, and done by Pillow: - // https://github.com/python-pillow/Pillow/blob/master/src/libImaging/GifDecode.c#L234-L251 + // https://github.com/python-pillow/Pillow/blob/ae43af61/src/libImaging/GifDecode.c#L234-L251 // which is fine handling this image: // https://cms-assets.tutsplus.com/uploads/users/30/posts/19890/image/hanging-punctuation-example.gif // (Aborting on table overflow, we would fail while in the middle From 8c191ef8bcadf544e700cb68d3423360ea30648c Mon Sep 17 00:00:00 2001 From: poire-z Date: Thu, 4 Jun 2020 16:44:58 +0200 Subject: [PATCH 02/11] Top progress bar: avoid re-computing when not needed LVDocView::getSectionBounds(), used to compute marks to show in the top progress bar, which isn't cheap, could be called (with KOReader) on each page turn. Have it be trashed only when a re-rendering is really done. Note that m_imageCache might be used by some frontends, and not by others. --- crengine/include/lvtinydom.h | 6 +++--- crengine/src/lvdocview.cpp | 12 +++++++----- crengine/src/lvtinydom.cpp | 10 +++++++--- 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/crengine/include/lvtinydom.h b/crengine/include/lvtinydom.h index 4e69f53da..b65867092 100755 --- a/crengine/include/lvtinydom.h +++ b/crengine/include/lvtinydom.h @@ -2496,9 +2496,9 @@ class ldomDocument : public lxmlDocBase virtual ~ldomDocument(); #if BUILD_LITE!=1 bool isRendered() { return _rendered; } - /// renders (formats) document in memory - virtual int render( LVRendPageList * pages, LVDocViewCallback * callback, int width, int dy, bool showCover, int y0, font_ref_t def_font, int def_interline_space, CRPropRef props ); - /// renders (formats) document in memory + /// renders (formats) document in memory: returns true if re-rendering needed, false if not + virtual bool render( LVRendPageList * pages, LVDocViewCallback * callback, int width, int dy, bool showCover, int y0, font_ref_t def_font, int def_interline_space, CRPropRef props ); + /// set global rendering properties virtual bool setRenderProps( int width, int dy, bool showCover, int y0, font_ref_t def_font, int def_interline_space, CRPropRef props ); #endif /// create xpointer from pointer string diff --git a/crengine/src/lvdocview.cpp b/crengine/src/lvdocview.cpp index 371c49a1f..a5ec05bc3 100755 --- a/crengine/src/lvdocview.cpp +++ b/crengine/src/lvdocview.cpp @@ -544,7 +544,6 @@ void LVDocView::clearImageCache() { #if CR_ENABLE_PAGE_IMAGE_CACHE==1 m_imageCache.clear(); #endif - m_section_bounds_valid = false; if (m_callback != NULL) m_callback->OnImageCacheClear(); } @@ -2768,9 +2767,9 @@ void LVDocView::Render(int dx, int dy, LVRendPageList * pages) { CRLog::debug("Render(width=%d, height=%d, fontSize=%d, currentFontSize=%d, 0 char width=%d)", dx, dy, m_font_size, m_font->getSize(), m_font->getCharWidth('0')); //CRLog::trace("calling render() for document %08X font=%08X", (unsigned int)m_doc, (unsigned int)m_font.get() ); - m_doc->render(pages, isDocumentOpened() ? m_callback : NULL, dx, dy, - m_showCover, m_showCover ? dy + m_pageMargins.bottom * 4 : 0, - m_font, m_def_interline_space, m_props); + bool did_rerender = m_doc->render(pages, isDocumentOpened() ? m_callback : NULL, dx, dy, + m_showCover, m_showCover ? dy + m_pageMargins.bottom * 4 : 0, + m_font, m_def_interline_space, m_props); #if 0 // For debugging lvpagesplitter.cpp (small books) @@ -2790,7 +2789,10 @@ void LVDocView::Render(int dx, int dy, LVRendPageList * pages) { fclose(f); } #endif - fontMan->gc(); + if ( did_rerender ) { + m_section_bounds_valid = false; + fontMan->gc(); + } m_is_rendered = true; //CRLog::debug("Making TOC..."); //makeToc(); diff --git a/crengine/src/lvtinydom.cpp b/crengine/src/lvtinydom.cpp index fe595667d..1780b6a85 100644 --- a/crengine/src/lvtinydom.cpp +++ b/crengine/src/lvtinydom.cpp @@ -4422,7 +4422,7 @@ bool ldomDocument::parseStyleSheet(lString16 cssFile) return parser.Parse(cssFile); } -int ldomDocument::render( LVRendPageList * pages, LVDocViewCallback * callback, int width, int dy, bool showCover, int y0, font_ref_t def_font, int def_interline_space, CRPropRef props ) +bool ldomDocument::render( LVRendPageList * pages, LVDocViewCallback * callback, int width, int dy, bool showCover, int y0, font_ref_t def_font, int def_interline_space, CRPropRef props ) { CRLog::info("Render is called for width %d, pageHeight=%d, fontFace=%s, docFlags=%d", width, dy, def_font->getTypeFace().c_str(), getDocFlags() ); CRLog::trace("initializing default style..."); @@ -4565,7 +4565,10 @@ int ldomDocument::render( LVRendPageList * pages, LVDocViewCallback * callback, //persist(); dumpStatistics(); - return height; + + return true; // full (re-)rendering done + // return height; + } else { CRLog::info("rendering context is not changed - no render!"); if ( _pagesData.pos() ) { @@ -4577,7 +4580,8 @@ int ldomDocument::render( LVRendPageList * pages, LVDocViewCallback * callback, if ( was_just_rendered_from_cache && callback ) callback->OnDocumentReady(); - return getFullHeight(); + return false; // no (re-)rendering needed + // return getFullHeight(); } } From 43366af2f70dff33858d58838f18c620966b0839 Mon Sep 17 00:00:00 2001 From: poire-z Date: Thu, 4 Jun 2020 16:45:00 +0200 Subject: [PATCH 03/11] Top progress bar: allow external filling of marks crengine builds its top progress bar markers from the start of each DocFragments (each html file in an EPUB). This will allow KOReader to manage it and fill it with markers made from the TOC, similarly to its bottom bar. --- crengine/include/lvdocview.h | 3 ++- crengine/src/lvdocview.cpp | 11 ++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/crengine/include/lvdocview.h b/crengine/include/lvdocview.h index 718b55541..3e86240a4 100755 --- a/crengine/include/lvdocview.h +++ b/crengine/include/lvdocview.h @@ -329,6 +329,7 @@ class LVDocView : public CacheLoadingCallback LVArray m_section_bounds; bool m_section_bounds_valid; + bool m_section_bounds_externally_updated; LVMutex _mutex; #if CR_ENABLE_PAGE_IMAGE_CACHE==1 @@ -627,7 +628,7 @@ class LVDocView : public CacheLoadingCallback /// returns true if document is opened bool isDocumentOpened(); /// returns section bounds, in 1/100 of percent - LVArray & getSectionBounds( ); + LVArray & getSectionBounds( bool for_external_update=false ); /// sets battery state virtual bool setBatteryState( int newState ); /// returns battery state diff --git a/crengine/src/lvdocview.cpp b/crengine/src/lvdocview.cpp index a5ec05bc3..892d2a0ea 100755 --- a/crengine/src/lvdocview.cpp +++ b/crengine/src/lvdocview.cpp @@ -167,6 +167,7 @@ LVDocView::LVDocView(int bitsPerPixel, bool noDefaultDocument) : #if CR_INTERNAL_PAGE_ORIENTATION==1 , m_rotateAngle(CR_ROTATE_ANGLE_0) #endif + , m_section_bounds_externally_updated(false) , m_section_bounds_valid(false), m_doc_format(doc_format_none), m_callback(NULL), m_swapDone(false), m_drawBufferBits( GRAY_BACKBUFFER_BITS) { @@ -1489,7 +1490,15 @@ void LVDocView::drawBatteryState(LVDrawBuf * drawbuf, const lvRect & batteryRc, } /// returns section bounds, in 1/100 of percent -LVArray & LVDocView::getSectionBounds() { +LVArray & LVDocView::getSectionBounds( bool for_external_update ) { + if (for_external_update || m_section_bounds_externally_updated) { + // Progress bar markes will be externally updated: we don't care + // about m_section_bounds_valid and we never trash it here. + // It's the frontend responsability to notice it needs some + // update and to update it. + m_section_bounds_externally_updated = true; + return m_section_bounds; + } if (m_section_bounds_valid) return m_section_bounds; m_section_bounds.clear(); From ff81068fda85ee73f094682865b298ab73e139ee Mon Sep 17 00:00:00 2001 From: poire-z Date: Thu, 4 Jun 2020 16:45:02 +0200 Subject: [PATCH 04/11] CSS/Text: properly inherit and handle text-align-last --- crengine/include/cssdef.h | 3 +- crengine/src/lvrend.cpp | 11 ++++++-- crengine/src/lvstsheet.cpp | 3 ++ crengine/src/lvtextfm.cpp | 56 ++++++++++++++++++++++---------------- crengine/src/lvtinydom.cpp | 2 +- 5 files changed, 46 insertions(+), 29 deletions(-) diff --git a/crengine/include/cssdef.h b/crengine/include/cssdef.h index c1a1f977f..1ce591697 100644 --- a/crengine/include/cssdef.h +++ b/crengine/include/cssdef.h @@ -61,7 +61,8 @@ enum css_text_align_t { css_ta_center, css_ta_justify, css_ta_start, // = left if LTR, right if RTL - css_ta_end // = right if LTR, left if LTR + css_ta_end, // = right if LTR, left if LTR + css_ta_auto // only accepted with text-align-last }; /// vertical-align property values diff --git a/crengine/src/lvrend.cpp b/crengine/src/lvrend.cpp index a8f58406f..859b50797 100755 --- a/crengine/src/lvrend.cpp +++ b/crengine/src/lvrend.cpp @@ -2162,6 +2162,7 @@ int styleToTextFmtFlags( const css_style_ref_t & style, int oldflags, int direct case css_ta_end: flg |= (direction == REND_DIRECTION_RTL ? LTEXT_ALIGN_LEFT : LTEXT_ALIGN_RIGHT); break; + case css_ta_auto: // shouldn't happen (only accepted with text-align-last) case css_ta_inherit: break; } @@ -2177,14 +2178,15 @@ int styleToTextFmtFlags( const css_style_ref_t & style, int oldflags, int direct flg |= LTEXT_LAST_LINE_ALIGN_CENTER; break; case css_ta_justify: - flg |= LTEXT_LAST_LINE_ALIGN_LEFT; + flg |= LTEXT_LAST_LINE_ALIGN_WIDTH; break; case css_ta_start: - flg |= (direction == REND_DIRECTION_RTL ? LTEXT_ALIGN_RIGHT : LTEXT_ALIGN_LEFT); + flg |= (direction == REND_DIRECTION_RTL ? LTEXT_LAST_LINE_ALIGN_RIGHT : LTEXT_LAST_LINE_ALIGN_LEFT); break; case css_ta_end: - flg |= (direction == REND_DIRECTION_RTL ? LTEXT_ALIGN_LEFT : LTEXT_ALIGN_RIGHT); + flg |= (direction == REND_DIRECTION_RTL ? LTEXT_LAST_LINE_ALIGN_LEFT : LTEXT_LAST_LINE_ALIGN_RIGHT); break; + case css_ta_auto: // let flg have none of the above set, which will mean "auto" case css_ta_inherit: break; } @@ -2949,6 +2951,7 @@ void renderFinalBlock( ldomNode * enode, LFormattedText * txform, RenderRectAcce flags |= (is_rtl ? LTEXT_ALIGN_LEFT : LTEXT_ALIGN_RIGHT); break; case css_ta_inherit: + case css_ta_auto: break; } } @@ -3209,6 +3212,7 @@ void renderFinalBlock( ldomNode * enode, LFormattedText * txform, RenderRectAcce baseflags |= (is_rtl ? LTEXT_ALIGN_LEFT : LTEXT_ALIGN_RIGHT); break; case css_ta_inherit: + case css_ta_auto: break; } // Among inline nodes, only
can carry a "clear: left/right/both". @@ -8808,6 +8812,7 @@ void setNodeStyle( ldomNode * enode, css_style_ref_t parent_style, LVFontRef par } UPDATE_STYLE_FIELD( white_space, css_ws_inherit ); UPDATE_STYLE_FIELD( text_align, css_ta_inherit ); + UPDATE_STYLE_FIELD( text_align_last, css_ta_inherit ); UPDATE_STYLE_FIELD( text_decoration, css_td_inherit ); UPDATE_STYLE_FIELD( text_transform, css_tt_inherit ); UPDATE_STYLE_FIELD( hyphenate, css_hyph_inherit ); diff --git a/crengine/src/lvstsheet.cpp b/crengine/src/lvstsheet.cpp index 89e3eee2c..42c961159 100644 --- a/crengine/src/lvstsheet.cpp +++ b/crengine/src/lvstsheet.cpp @@ -919,6 +919,7 @@ static const char * css_ta_names[] = "justify", "start", "end", + "auto", NULL }; @@ -1382,6 +1383,8 @@ bool LVCssDeclaration::parse( const char * &decl, bool higher_importance, lxmlDo break; case cssd_text_align: n = parse_name( decl, css_ta_names, -1 ); + if ( n == css_ta_auto ) // only accepted with text-align-last + n = -1; break; case cssd_text_align_last: n = parse_name( decl, css_ta_names, -1 ); diff --git a/crengine/src/lvtextfm.cpp b/crengine/src/lvtextfm.cpp index 766f854b8..e8d628525 100755 --- a/crengine/src/lvtextfm.cpp +++ b/crengine/src/lvtextfm.cpp @@ -2136,35 +2136,43 @@ class LVFormatter { // Find out text alignment to ensure for this line int align = para->flags & LTEXT_FLAG_NEWLINE; - TR("addLine(%d, %d) y=%d align=%d", start, end, m_y, align); - // printf("addLine(%d, %d) y=%d align=%d maxWidth=%d\n", start, end, m_y, align, maxWidth); - // For some reason, text_align_last inheritance is not ensured in lvrend.cpp, - // may be to be able to kill justification for the last (or a single) line as - // easily as what follows below. - // Here, text_align_last = 0 when it has not explicitely been set by the style - // of the erm_final node. - int text_align_last = (para->flags >> LTEXT_LAST_LINE_ALIGN_SHIFT) & LTEXT_FLAG_NEWLINE; - if ( last && !first && align==LTEXT_ALIGN_WIDTH && text_align_last!=0 ) - align = text_align_last; - else if ( align==LTEXT_ALIGN_WIDTH && last ) { - // text-align-last: not specified, justification is in use, and this line - // is the last (or a single line): align it to the left. - align = LTEXT_ALIGN_LEFT; - // Unless fribidi detected this paragraph is RTL: align it to the right - if ( m_para_dir_is_rtl ) - align = LTEXT_ALIGN_RIGHT; - } - if ( preFormattedOnly || !align ) - align = LTEXT_ALIGN_LEFT; - if ( last && !first ) { // Last line of paragraph (when not a single line paragraph) - int last_align = (para->flags>>16) & LTEXT_FLAG_NEWLINE; - if ( last_align ) + + // Note that with Firefox, text-align-last applies to the first line when + // it is also the last (so, it is used for a single line paragraph). + // Also, when "text-align-last: justify", Firefox does justify the last + // (or single) line. + if ( last ) { // Last line of paragraph, or single line paragraph + // https://drafts.csswg.org/css-text-3/#text-align-last-property + // "If 'auto' is specified, content on the affected line is aligned + // per text-align-all unless text-align-all is set to justify, + // in which case it is start-aligned. All other values are + // interpreted as described for text-align. " + int last_align = (para->flags >> LTEXT_LAST_LINE_ALIGN_SHIFT) & LTEXT_FLAG_NEWLINE; + if ( last_align ) { + // specified (or inherited) to something other than 'auto': use it align = last_align; + } + else { // text-align-last: auto (inherited default) + // Keep using value from text-align, except when it is set to 'justify' + if ( align == LTEXT_ALIGN_WIDTH ) { + // Justification is in use, and this line is the last + // (or a single line): align it to the left (or to the + // right if FriBiDi detected this paragraph is RTL) + align = m_para_dir_is_rtl ? LTEXT_ALIGN_RIGHT : LTEXT_ALIGN_LEFT; + } + } } + // Override it for PRE lines (or in case align has not been set) + if ( preFormattedOnly || !align ) + align = m_para_dir_is_rtl ? LTEXT_ALIGN_RIGHT : LTEXT_ALIGN_LEFT; + + TR("addLine(%d, %d) y=%d align=%d", start, end, m_y, align); + // printf("addLine(%d, %d) y=%d align=%d maxWidth=%d\n", start, end, m_y, align, maxWidth); + // Note: in the code and comments, all these mean the same thing: // visual alignment enabled, floating punctuation, hanging punctuation - bool visualAlignmentEnabled = gFlgFloatingPunctuationEnabled!=0 && (align == LTEXT_ALIGN_WIDTH || align == LTEXT_ALIGN_RIGHT ||align==LTEXT_ALIGN_LEFT); + bool visualAlignmentEnabled = (gFlgFloatingPunctuationEnabled != 0) && (align != LTEXT_ALIGN_CENTER); // Note: parameter needReduceSpace and variable splitBySpaces (which // was always true) have been removed, as we always split by space: diff --git a/crengine/src/lvtinydom.cpp b/crengine/src/lvtinydom.cpp index 1780b6a85..64918eef9 100644 --- a/crengine/src/lvtinydom.cpp +++ b/crengine/src/lvtinydom.cpp @@ -4254,7 +4254,7 @@ bool ldomDocument::setRenderProps( int width, int dy, bool /*showCover*/, int /* s->display = css_d_block; s->white_space = css_ws_normal; s->text_align = css_ta_start; - s->text_align_last = css_ta_start; + s->text_align_last = css_ta_auto; s->text_decoration = css_td_none; s->text_transform = css_tt_none; s->hyphenate = css_hyph_auto; From 865ad230f3decc50d15750dbb4051601c39a7e5d Mon Sep 17 00:00:00 2001 From: poire-z Date: Thu, 4 Jun 2020 16:45:04 +0200 Subject: [PATCH 05/11] getRenderedWidths(): fix handling of text-indent Also properly measure table captions as they are just like erm_final nodes. --- crengine/src/lvrend.cpp | 42 ++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/crengine/src/lvrend.cpp b/crengine/src/lvrend.cpp index 859b50797..0b4d4d4fd 100755 --- a/crengine/src/lvrend.cpp +++ b/crengine/src/lvrend.cpp @@ -9176,10 +9176,10 @@ void getRenderedWidths(ldomNode * node, int &maxWidth, int &minWidth, int direct maxWidth = curMaxWidth; if (curWordWidth > minWidth) minWidth = curWordWidth; - // First word after a
should not have positive text-indent in its width, + // First word after a
should not have text-indent in its width, // but we did reset 'indent' to 0 after the first word of the final block. - // If we get some non-zero indent here, it is actually negated negative indent - // that should be applied to all words, including the one after a
, and + // If we get some non-zero indent here, it is "hanging" indent, that + // should be applied to all words, including the one after a
, and // so it should contribute to the new line full width (curMaxWidth). curMaxWidth = indent; curWordWidth = indent; @@ -9277,7 +9277,8 @@ void getRenderedWidths(ldomNode * node, int &maxWidth, int &minWidth, int direct _maxWidth = lengthToPx( style_width, 0, em ); _minWidth = _maxWidth; } - else if (m == erm_final) { // Block node that contains only inline or text nodes: + else if (m == erm_final || m == erm_table_caption) { + // Block node that contains only inline or text nodes if ( is_img ) { // img with display: block always become erm_final (never erm_block) if (img_width > 0) { // block img with a fixed width _maxWidth = img_width; @@ -9285,28 +9286,20 @@ void getRenderedWidths(ldomNode * node, int &maxWidth, int &minWidth, int direct } } else { + // curMaxWidth and curWordWidth are not used in our parents (which + // are block-like elements), we can just reset them. + curMaxWidth = 0; + curWordWidth = 0; // We don't have any width yet to use for text-indent in % units, // but this is very rare - use em as we must use something int em = node->getFont()->getSize(); indent = lengthToPx(style->text_indent, em, em); - // curMaxWidth and curWordWidth are not used in our parents (which - // are block-like elements), we can just reset them. - // First word will have text-indent has its width + // First word will have text-indent as part of its width if ( style->text_indent.value & 0x00000001 ) { - // lvstsheet sets the lowest bit to 1 when text-indent has the "hanging" keyword, - // which will be handled like negative margins - indent = -indent; - } - if ( indent >= 0 ) { - // Positive indent applies only on the first line, so account - // for it only on the first word. - curMaxWidth = indent; - curWordWidth = indent; - indent = 0; // but no more on following words in this final node, even after
- } - else { - // Negative indent does not apply on the first word, but may apply on each - // followup word if a wrap happens before thema so don't reset it. + // lvstsheet sets the lowest bit to 1 when text-indent has the "hanging" keyword. + // "hanging" means it should apply on all line except the first. + // Hanging indent does not apply on the first word, but may apply on each + // followup word if a wrap happens before them so don't reset it. // To keep things simple and readable here, we only apply it to the first // word after a
- but it should really apply on each word, everytime // we reset curWordWidth, which would make the below code quite ugly and @@ -9315,6 +9308,13 @@ void getRenderedWidths(ldomNode * node, int &maxWidth, int &minWidth, int direct // (We don't handle the shift/overlap with padding that a real negative // indent can cause - so, we may return excessive widths.) } + else { + // Not-"hanging" positive or negative indent applies only on the first line, + // so account for it only on the first word. + curMaxWidth += indent; + curWordWidth += indent; + indent = 0; // but no more on following words in this final node, even after
+ } if (list_marker_width > 0 && !list_marker_width_as_padding) { // with additional list marker if list-style-position: inside curMaxWidth += list_marker_width; From 204ed9b796e0c173366c92f3623941322023a646 Mon Sep 17 00:00:00 2001 From: poire-z Date: Thu, 4 Jun 2020 16:45:06 +0200 Subject: [PATCH 06/11] Reorder some flags to make the sets clearer --- crengine/include/lvfnt.h | 18 ++++-- crengine/include/lvfntman.h | 9 ++- crengine/include/lvrend.h | 4 +- crengine/include/lvtextfm.h | 114 ++++++++++++++++++++---------------- crengine/src/lvfntman.cpp | 4 +- crengine/src/lvrend.cpp | 22 ++++--- crengine/src/lvtinydom.cpp | 2 +- 7 files changed, 96 insertions(+), 77 deletions(-) diff --git a/crengine/include/lvfnt.h b/crengine/include/lvfnt.h index 543cdb919..808e6a135 100644 --- a/crengine/include/lvfnt.h +++ b/crengine/include/lvfnt.h @@ -238,6 +238,7 @@ lUInt16 lvfontMeasureText( const lvfont_handle pfont, // (This one is actually not set by lvfntman) #define LCHAR_LOCKED_SPACING 0x0040 ///< flag: forbid any letter spacing tweak on this char // (for cursive scripts like arabic, and special cases) +#define LCHAR__AVAILABLE_BIT_08__ 0x0080 /// The next ones, not fitting in a lUInt8, should only be set and used by lvtextfm #define LCHAR_IS_OBJECT 0x0100 ///< flag: this char is object (image, float) @@ -245,17 +246,22 @@ lUInt16 lvfontMeasureText( const lvfont_handle pfont, #define LCHAR_IS_TO_IGNORE 0x0400 ///< flag: this char is to be ignored/skipped in text measurement and drawing #define LCHAR_IS_RTL 0x0800 ///< flag: this char is part of a RTL segment -// (Next ones are not yet used and can be removed/changed) -#define LCHAR_IS_CJK_NOT_PUNCT 0x1000 ///< flag: this char is part a CJK char but not a punctuation -#define LCHAR_IS_CJK_LEFT_PUNCT 0x2000 ///< flag: this char is part a CJK left punctuation -#define LCHAR_IS_CJK_RIGHT_PUNCT 0x4000 ///< flag: this char is part a CJK right punctuation +#define LCHAR__AVAILABLE_BIT_13__ 0x1000 +#define LCHAR__AVAILABLE_BIT_14__ 0x2000 +#define LCHAR__AVAILABLE_BIT_15__ 0x4000 +#define LCHAR__AVAILABLE_BIT_16__ 0x8000 -#define LCHAR_IS_CJK_PUNCT 0x6000 ///< flag: (for checking) this char is a CJK punctuation (neutral if set) -#define LCHAR_IS_CJK 0x7000 ///< flag: (for checking) this char is a CJK char +// Some idea, if needed: +// #define LCHAR_IS_CJK_NOT_PUNCT 0x1000 ///< flag: this char is part a CJK char but not a punctuation +// #define LCHAR_IS_CJK_LEFT_PUNCT 0x2000 ///< flag: this char is part a CJK left punctuation +// #define LCHAR_IS_CJK_RIGHT_PUNCT 0x4000 ///< flag: this char is part a CJK right punctuation +// #define LCHAR_IS_CJK_PUNCT 0x6000 ///< flag: (for checking) this char is a CJK punctuation (neutral if set) +// #define LCHAR_IS_CJK 0x7000 ///< flag: (for checking) this char is a CJK char // LCHAR_IS_EOL was not used by any code, and has been replaced by LCHAR_IS_CLUSTER_TAIL // #define LCHAR_IS_EOL 0x0010 ///< flag: this char is CR or LF + /** \brief returns true if character is unicode space \param code is character \return 1 if character is space, 0 otherwise diff --git a/crengine/include/lvfntman.h b/crengine/include/lvfntman.h index 7282a92f1..b54b11613 100644 --- a/crengine/include/lvfntman.h +++ b/crengine/include/lvfntman.h @@ -215,11 +215,10 @@ enum kerning_mode_t { #define LFNT_HINT_IS_FALLBACK_FONT 0x0010 /// set on recursive Harfbuzz rendering/drawing with a fallback font // These 4 translate from LTEXT_TD_* equivalents (see lvtextfm.h). Keep them in sync. -#define LFNT_DRAW_UNDERLINE 0x0100 /// underlined text -#define LFNT_DRAW_OVERLINE 0x0200 /// overlined text -#define LFNT_DRAW_LINE_THROUGH 0x0400 /// striked through text -#define LFNT_DRAW_BLINK 0x0800 /// blinking text (implemented as underline) -#define LFNT_DRAW_DECORATION_MASK 0x0F00 +#define LFNT_DRAW_UNDERLINE 0x1000 /// underlined text +#define LFNT_DRAW_OVERLINE 0x2000 /// overlined text +#define LFNT_DRAW_LINE_THROUGH 0x4000 /// striked through text +#define LFNT_DRAW_DECORATION_MASK 0x7000 // CSS font-variant and font-feature-settings properties: diff --git a/crengine/include/lvrend.h b/crengine/include/lvrend.h index d5cbdc8e0..ad4c5fd49 100644 --- a/crengine/include/lvrend.h +++ b/crengine/include/lvrend.h @@ -119,9 +119,9 @@ void initFormatData( ldomNode * node ); /// initializes rendering method for node int initRendMethod( ldomNode * node, bool recurseChildren, bool allowAutoboxing ); /// converts style to text formatting API flags -int styleToTextFmtFlags( const css_style_ref_t & style, int oldflags, int direction=REND_DIRECTION_UNSET ); +lUInt32 styleToTextFmtFlags( const css_style_ref_t & style, lUInt32 oldflags, int direction=REND_DIRECTION_UNSET ); /// renders block as single text formatter object -void renderFinalBlock( ldomNode * node, LFormattedText * txform, RenderRectAccessor * fmt, int & flags, +void renderFinalBlock( ldomNode * node, LFormattedText * txform, RenderRectAccessor * fmt, lUInt32 & flags, int indent, int line_h, TextLangCfg * lang_cfg=NULL, int valign_dy=0, bool * is_link_start=NULL ); /// renders block which contains subblocks (with gRenderBlockRenderingFlags as flags) int renderBlockElement( LVRendPageContext & context, ldomNode * enode, int x, int y, int width, int direction=REND_DIRECTION_UNSET, int * baseline=NULL ); diff --git a/crengine/include/lvtextfm.h b/crengine/include/lvtextfm.h index fd7062344..b8a528ea4 100755 --- a/crengine/include/lvtextfm.h +++ b/crengine/include/lvtextfm.h @@ -27,56 +27,65 @@ extern "C" { #endif // src_text_fragment_t flags -#define LTEXT_ALIGN_LEFT 0x0001 /**< \brief new left-aligned paragraph */ -#define LTEXT_ALIGN_RIGHT 0x0002 /**< \brief new right-aligned paragraph */ -#define LTEXT_ALIGN_CENTER 0x0003 /**< \brief new centered paragraph */ -#define LTEXT_ALIGN_WIDTH 0x0004 /**< \brief new justified paragraph */ -#define LTEXT_LAST_LINE_ALIGN_SHIFT 16 - -#define LTEXT_LAST_LINE_ALIGN_LEFT 0x00010000 /**< \brief last line of justified paragraph should be left-aligned */ -#define LTEXT_LAST_LINE_ALIGN_RIGHT 0x00020000 /**< \brief last line of justified paragraph should be right-aligned */ -#define LTEXT_LAST_LINE_ALIGN_CENTER 0x00030000 /**< \brief last line of justified paragraph should be centered */ -#define LTEXT_LAST_LINE_ALIGN_WIDTH 0x00040000 /**< \brief last line of justified paragraph should be justified */ - - -#define LTEXT_FLAG_NEWLINE 0x0007 /**< \brief new line flags mask */ -#define LTEXT_FLAG_OWNTEXT 0x0008 /**< \brief store local copy of text instead of pointer */ - -#define LTEXT_VALIGN_MASK 0x0070 /**< \brief vertical align flags mask */ -#define LTEXT_VALIGN_BASELINE 0x0000 /**< \brief baseline vertical align */ -#define LTEXT_VALIGN_SUB 0x0010 /**< \brief subscript */ -#define LTEXT_VALIGN_SUPER 0x0020 /**< \brief superscript */ -#define LTEXT_VALIGN_MIDDLE 0x0030 /**< \brief middle */ -#define LTEXT_VALIGN_BOTTOM 0x0040 /**< \brief bottom */ -#define LTEXT_VALIGN_TEXT_BOTTOM 0x0050 /**< \brief text-bottom */ -#define LTEXT_VALIGN_TOP 0x0060 /**< \brief top */ -#define LTEXT_VALIGN_TEXT_TOP 0x0070 /**< \brief text-top */ - -#define LTEXT_TD_UNDERLINE 0x0100 /**< \brief underlined text */ -#define LTEXT_TD_OVERLINE 0x0200 /**< \brief overlined text */ -#define LTEXT_TD_LINE_THROUGH 0x0400 /**< \brief striked through text */ -#define LTEXT_TD_BLINK 0x0800 /**< \brief blinking text */ -#define LTEXT_TD_MASK 0x0F00 /**< \brief text decoration mask */ - // These 4 above translate to LFNT_DRAW_* equivalents (see lvfntman.h). Keep them in sync. - -#define LTEXT_SRC_IS_OBJECT 0x8000 /**< \brief object (image) */ -#define LTEXT_IS_LINK 0x4000 /**< \brief link */ -#define LTEXT_HYPHENATE 0x1000 /**< \brief allow hyphenation */ -#define LTEXT_RUNIN_FLAG 0x2000 /**< \brief element display mode is runin */ - -#define LTEXT_FLAG_PREFORMATTED 0x0080 /**< \brief element space mode is preformatted */ - -#define LTEXT_SRC_IS_CLEAR_RIGHT 0x00100000 /**< \brief text follows
*/ -#define LTEXT_SRC_IS_CLEAR_LEFT 0x00200000 /**< \brief text follows
*/ -#define LTEXT_SRC_IS_CLEAR_BOTH 0x00300000 /**< \brief text follows
*/ -#define LTEXT_SRC_IS_CLEAR_LAST 0x00400000 /**< \brief ignorable text, added when nothing follows
*/ - -#define LTEXT_SRC_IS_FLOAT 0x01000000 /**< \brief float:'ing node */ -#define LTEXT_SRC_IS_FLOAT_DONE 0x02000000 /**< \brief float:'ing node (already dealt with) */ -#define LTEXT_SRC_IS_INLINE_BOX 0x04000000 /**< \brief inlineBox wrapping node */ - -#define LTEXT_STRUT_CONFINED 0x08000000 /**< \brief text should not overflow/modify its paragraph strut baseline and height */ +// Text horizontal alignment +#define LTEXT_FLAG_NEWLINE 0x0007 // Mask: next flags are set only on the first fragment following a newline +#define LTEXT_ALIGN_LEFT 0x0001 // left-aligned paragraph +#define LTEXT_ALIGN_RIGHT 0x0002 // right-aligned paragraph +#define LTEXT_ALIGN_CENTER 0x0003 // centered paragraph +#define LTEXT_ALIGN_WIDTH 0x0004 // justified paragraph +#define LTEXT_LAST_LINE_ALIGN_SHIFT 4 // Shift to map the following flags to the previous ones +#define LTEXT_LAST_LINE_ALIGN_LEFT 0x0010 // last line of justified paragraph should be left-aligned +#define LTEXT_LAST_LINE_ALIGN_RIGHT 0x0020 // last line of justified paragraph should be right-aligned +#define LTEXT_LAST_LINE_ALIGN_CENTER 0x0030 // last line of justified paragraph should be centered +#define LTEXT_LAST_LINE_ALIGN_WIDTH 0x0040 // last line of justified paragraph should be justified + +// Text vertical alignment +#define LTEXT_VALIGN_MASK 0x0700 // vertical align flags mask +#define LTEXT_VALIGN_BASELINE 0x0000 // baseline vertical align +#define LTEXT_VALIGN_SUB 0x0100 // subscript +#define LTEXT_VALIGN_SUPER 0x0200 // superscript +#define LTEXT_VALIGN_MIDDLE 0x0300 // middle +#define LTEXT_VALIGN_BOTTOM 0x0400 // bottom +#define LTEXT_VALIGN_TEXT_BOTTOM 0x0500 // text-bottom +#define LTEXT_VALIGN_TOP 0x0600 // top +#define LTEXT_VALIGN_TEXT_TOP 0x0700 // text-top +#define LTEXT_STRUT_CONFINED 0x0800 // text should not overflow/modify its paragraph strut baseline and height + +// Text decoration +#define LTEXT_TD_MASK 0x7000 // text decoration mask +#define LTEXT_TD_UNDERLINE 0x1000 // underlined text +#define LTEXT_TD_OVERLINE 0x2000 // overlined text +#define LTEXT_TD_LINE_THROUGH 0x4000 // striked through text + // These 3 above translate to LFNT_DRAW_* equivalents (see lvfntman.h). Keep them in sync. + +// (Don't waste the 4th bit not used in the 4-bits sets above) +#define LTEXT_FLAG_OWNTEXT 0x0008 // store local copy of text instead of pointer +#define LTEXT_IS_LINK 0x0080 // source text is a link (to gather in-page footnotes) +#define LTEXT_RUNIN_FLAG 0x8000 // element display mode is runin (used with FB2 footnotes) + +// Text white-space and hyphenation handling +#define LTEXT_FLAG_PREFORMATTED 0x00010000 // text is preformatted (white-space: pre, pre-wrap, break-spaces) +#define LTEXT_FLAG_NOWRAP 0x00020000 // text does not allow wrap (white-space: nowrap) +#define LTEXT_HYPHENATE 0x00040000 // allow hyphenation +#define LTEXT__AVAILABLE_BIT_20__ 0x00080000 + +// Source object type (when source is not a text node) +#define LTEXT_SRC_IS_OBJECT 0x00100000 // object (image) +#define LTEXT_SRC_IS_INLINE_BOX 0x00200000 // inlineBox wrapping node +#define LTEXT_SRC_IS_FLOAT 0x00400000 // float:'ing node +#define LTEXT_SRC_IS_FLOAT_DONE 0x00800000 // float:'ing node (already dealt with) +// "clear" handling +#define LTEXT_SRC_IS_CLEAR_RIGHT 0x01000000 // text follows
+#define LTEXT_SRC_IS_CLEAR_LEFT 0x02000000 // text follows
+#define LTEXT_SRC_IS_CLEAR_BOTH 0x03000000 // text follows
+#define LTEXT_SRC_IS_CLEAR_LAST 0x04000000 // ignorable text, added when nothing follows
+ +#define LTEXT__AVAILABLE_BIT_28__ 0x08000000 +#define LTEXT__AVAILABLE_BIT_29__ 0x10000000 +#define LTEXT__AVAILABLE_BIT_30__ 0x20000000 +#define LTEXT__AVAILABLE_BIT_31__ 0x40000000 +#define LTEXT__AVAILABLE_BIT_32__ 0x80000000 /** \brief Source text line */ @@ -148,6 +157,7 @@ typedef struct #define LTEXT_WORD_IS_LINK_START 0x0010 /// first word of link flag #define LTEXT_WORD_IS_OBJECT 0x0020 /// word is an image #define LTEXT_WORD_IS_INLINE_BOX 0x0040 /// word is a inline-block or inline-table wrapping box +#define LTEXT_WORD__AVAILABLE_BIT_08__ 0x0080 #define LTEXT_WORD_DIRECTION_KNOWN 0x0100 /// word has been thru bidi: if next flag is unset, it is LTR. #define LTEXT_WORD_DIRECTION_IS_RTL 0x0200 /// word is RTL @@ -164,6 +174,7 @@ typedef struct #define LTEXT_WORD_VALIGN_BOTTOM 0x2000 /// word is to be vertical-align: bottom #define LTEXT_WORD_STRUT_CONFINED 0x4000 /// word is to be fully contained into strut bounds /// (used only when one of the 2 previous is set) +#define LTEXT_WORD__AVAILABLE_BIT_16__ 0x8000 //#define LTEXT_BACKGROUND_MARK_FLAGS 0xFFFF0000l @@ -173,6 +184,11 @@ typedef struct #define LTEXT_LINE_IS_BIDI 0x04 #define LTEXT_LINE_PARA_IS_RTL 0x08 +#define LTEXT_LINE__AVAILABLE_BIT_05__ 0x10 +#define LTEXT_LINE__AVAILABLE_BIT_06__ 0x20 +#define LTEXT_LINE__AVAILABLE_BIT_07__ 0x40 +#define LTEXT_LINE__AVAILABLE_BIT_08__ 0x80 + /** \brief Text formatter formatted line */ typedef struct diff --git a/crengine/src/lvfntman.cpp b/crengine/src/lvfntman.cpp index e9aa92602..14b689826 100644 --- a/crengine/src/lvfntman.cpp +++ b/crengine/src/lvfntman.cpp @@ -3089,7 +3089,7 @@ class LVFreeTypeFace : public LVFont x0 -= text_decoration_back_gap; int h = _size > 30 ? 2 : 1; lUInt32 cl = buf->GetTextColor(); - if ( (flags & LFNT_DRAW_UNDERLINE) || (flags & LFNT_DRAW_BLINK) ) { + if ( flags & LFNT_DRAW_UNDERLINE ) { int liney = y + _baseline + h; buf->FillRect( x0, liney, x, liney+h, cl ); } @@ -3494,7 +3494,7 @@ class LVFontBoldTransform : public LVFont x0 -= text_decoration_back_gap; int h = _size > 30 ? 2 : 1; lUInt32 cl = buf->GetTextColor(); - if ( (flags & LFNT_DRAW_UNDERLINE) || (flags & LFNT_DRAW_BLINK) ) { + if ( flags & LFNT_DRAW_UNDERLINE ) { int liney = y + _baseline + h; buf->FillRect( x0, liney, x, liney+h, cl ); } diff --git a/crengine/src/lvrend.cpp b/crengine/src/lvrend.cpp index 0b4d4d4fd..74e1a8840 100755 --- a/crengine/src/lvrend.cpp +++ b/crengine/src/lvrend.cpp @@ -2132,9 +2132,9 @@ LVFontRef getFont(css_style_rec_t * style, int documentId) return fnt; } -int styleToTextFmtFlags( const css_style_ref_t & style, int oldflags, int direction ) +lUInt32 styleToTextFmtFlags( const css_style_ref_t & style, lUInt32 oldflags, int direction ) { - int flg = oldflags; + lUInt32 flg = oldflags; if ( style->display == css_d_run_in ) { flg |= LTEXT_RUNIN_FLAG; } //else @@ -2304,7 +2304,7 @@ void SplitLines( const lString16 & str, lString16Collection & lines ) // marker_width is updated and can be used to add indent or padding necessary to make // room for the marker (what and how to do it depending of list-style_position (inside/outside) // is left to the caller) -lString16 renderListItemMarker( ldomNode * enode, int & marker_width, LFormattedText * txform, int line_h, int flags ) { +lString16 renderListItemMarker( ldomNode * enode, int & marker_width, LFormattedText * txform, int line_h, lUInt32 flags ) { lString16 marker; marker_width = 0; // The UL > LI parent-child chain may have had some of our boxing elements inserted @@ -2424,7 +2424,7 @@ bool renderAsListStylePositionInside( const css_style_rec_t * style, bool is_rtl // as is to the inline children elements: it is only used to get the width of // the container, which is only needed to compute indent (text-indent) values in %, // and to get paragraph direction (LTR/RTL/UNSET). -void renderFinalBlock( ldomNode * enode, LFormattedText * txform, RenderRectAccessor * fmt, int & baseflags, int indent, int line_h, TextLangCfg * lang_cfg, int valign_dy, bool * is_link_start ) +void renderFinalBlock( ldomNode * enode, LFormattedText * txform, RenderRectAccessor * fmt, lUInt32 & baseflags, int indent, int line_h, TextLangCfg * lang_cfg, int valign_dy, bool * is_link_start ) { if ( enode->isElement() ) { lvdom_element_render_method rm = enode->getRendMethod(); @@ -2466,7 +2466,7 @@ void renderFinalBlock( ldomNode * enode, LFormattedText * txform, RenderRectAcce // - with inline nodes, it only updates LTEXT_FLAG_PREFORMATTED flag when css_ws_pre // - with block nodes (so, only with the first "final" node, and not when // recursing its children which are inline), it will set horitontal alignment flags - int flags = styleToTextFmtFlags( enode->getStyle(), baseflags, direction ); + lUInt32 flags = styleToTextFmtFlags( enode->getStyle(), baseflags, direction ); // Note: // - baseflags (passed by reference) is shared and re-used by this node's siblings // (all inline); it should carry newline/horizontal aligment flag, which should @@ -2742,6 +2742,7 @@ void renderFinalBlock( ldomNode * enode, LFormattedText * txform, RenderRectAcce } switch ( style->text_decoration ) { case css_td_underline: + case css_td_blink: // (render it underlined) flags |= LTEXT_TD_UNDERLINE; break; case css_td_overline: @@ -2750,9 +2751,6 @@ void renderFinalBlock( ldomNode * enode, LFormattedText * txform, RenderRectAcce case css_td_line_through: flags |= LTEXT_TD_LINE_THROUGH; break; - case css_td_blink: - flags |= LTEXT_TD_BLINK; - break; default: break; } @@ -2882,7 +2880,7 @@ void renderFinalBlock( ldomNode * enode, LFormattedText * txform, RenderRectAcce bool isBlock = style->display == css_d_block; if ( isBlock ) { // If block image, forget any current flags and start from baseflags (?) - int flags = styleToTextFmtFlags( enode->getStyle(), baseflags, direction ); + lUInt32 flags = styleToTextFmtFlags( enode->getStyle(), baseflags, direction ); //txform->AddSourceLine(L"title", 5, 0x000000, 0xffffff, font, baseflags, interval, margin, NULL, 0, 0); LVFont * font = enode->getFont().get(); lUInt32 cl = style->color.type!=css_val_color ? 0xFFFFFFFF : style->color.value; @@ -3259,7 +3257,7 @@ void renderFinalBlock( ldomNode * enode, LFormattedText * txform, RenderRectAcce #endif ldomNode * parent = enode->getParentNode(); - int tflags = LTEXT_FLAG_OWNTEXT; + lUInt32 tflags = LTEXT_FLAG_OWNTEXT; // if ( parent->getNodeId() == el_a ) // "123" in 123 would not be flagged if (is_link_start && *is_link_start) { // was propagated from some outer tflags |= LTEXT_IS_LINK; // used to gather in-page footnotes @@ -8143,7 +8141,7 @@ void DrawDocument( LVDrawBuf & drawbuf, ldomNode * enode, int x0, int y0, int dx LFormattedTextRef txform( enode->getDocument()->createFormattedText() ); // If RTL, have the marker aligned to the right inside list_marker_width - int txt_flags = is_rtl ? LTEXT_ALIGN_RIGHT : 0; + lUInt32 txt_flags = is_rtl ? LTEXT_ALIGN_RIGHT : 0; int list_marker_width; lString16 marker = renderListItemMarker( enode, list_marker_width, txform.get(), -1, txt_flags); lUInt32 h = txform->Format( (lUInt16)list_marker_width, (lUInt16)page_height, direction ); @@ -8317,7 +8315,7 @@ void DrawDocument( LVDrawBuf & drawbuf, ldomNode * enode, int x0, int y0, int dx LFormattedTextRef txform( enode->getDocument()->createFormattedText() ); // If RTL, have the marker aligned to the right inside list_marker_width - int txt_flags = is_rtl ? LTEXT_ALIGN_RIGHT : 0; + lUInt32 txt_flags = is_rtl ? LTEXT_ALIGN_RIGHT : 0; int list_marker_width; lString16 marker = renderListItemMarker( enode, list_marker_width, txform.get(), -1, txt_flags); lUInt32 h = txform->Format( (lUInt16)list_marker_width, (lUInt16)page_height, direction ); diff --git a/crengine/src/lvtinydom.cpp b/crengine/src/lvtinydom.cpp index 64918eef9..9dcc80272 100644 --- a/crengine/src/lvtinydom.cpp +++ b/crengine/src/lvtinydom.cpp @@ -16461,7 +16461,7 @@ int ldomNode::renderFinalBlock( LFormattedTextRef & frmtext, RenderRectAccessor //RenderRectAccessor fmt( this ); /// render whole node content as single formatted object int direction = RENDER_RECT_PTR_GET_DIRECTION(fmt); - int flags = styleToTextFmtFlags( getStyle(), 0, direction ); + lUInt32 flags = styleToTextFmtFlags( getStyle(), 0, direction ); int lang_node_idx = fmt->getLangNodeIndex(); TextLangCfg * lang_cfg = TextLangMan::getTextLangCfg(lang_node_idx>0 ? getDocument()->getTinyNode(lang_node_idx) : NULL); ::renderFinalBlock( this, f.get(), fmt, flags, 0, -1, lang_cfg ); From 8ebd12dec724207711a2e86795ace068fe8bc408 Mon Sep 17 00:00:00 2001 From: poire-z Date: Thu, 4 Jun 2020 16:45:09 +0200 Subject: [PATCH 07/11] CSS: support more white-space named values Only white-space 'normal' and 'pre' was supported, other values were ignored and handled as 'normal'. This adds support (possibly limited or approximated) for: 'nowrap', 'pre-line', 'pre-wrap' and 'break-spaces'. Fix pre & nowrap handling in text formatting and rendered width measuring. --- crengine/include/cssdef.h | 44 ++++++++++- crengine/include/lvfnt.h | 3 +- crengine/include/lvtinydom.h | 4 + crengine/src/lstridmap.cpp | 2 +- crengine/src/lvrend.cpp | 89 ++++++++++++++++++--- crengine/src/lvstsheet.cpp | 5 +- crengine/src/lvtextfm.cpp | 148 +++++++++++++++++++++++++++++++---- crengine/src/lvtinydom.cpp | 31 +++++--- 8 files changed, 281 insertions(+), 45 deletions(-) diff --git a/crengine/include/cssdef.h b/crengine/include/cssdef.h index 1ce591697..ce14fe2b0 100644 --- a/crengine/include/cssdef.h +++ b/crengine/include/cssdef.h @@ -45,12 +45,52 @@ enum css_display_t { css_d_none }; -/// white-space property values +// https://www.w3.org/TR/CSS2/text.html#white-space-prop +// https://florian.rivoal.net/talks/line-breaking/ +// https://developer.mozilla.org/en-US/docs/Web/CSS/white-space +// Behaviors: New lines Spaces/tabs End-of-line spaces Text wrap +// normal Collapse Collapse Remove Wrap +// nowrap Collapse Collapse Remove No wrap +// pre-line Preserve Collapse Remove Wrap +// pre Preserve Preserve Preserve No wrap +// pre-wrap Preserve Preserve Hang Wrap +// break-spaces Preserve Preserve Wrap Wrap +// +// crengine ensures the 3 first behaviors at XML parsing time, initially only for: +// 'normal' : replace new lines and tabs by spaces, replace consecutive spaces +// by only one, remove spaces at start and end if "display: block" +// 'pre' : preserve spaces and newlines, expands tabs to 8-spaces tabstops +// A change of the white-space value for a single node will make the DOM stalled, +// and a full reload should be done to get the correct result. +// +// The last behavior (text wrap) happens at text rendering time, and +// we always wrap to fit text into the container or screen width. +// +// We can approximate support for the other values: +// 'nowrap' is mostly like 'normal', but need some additional care: +// - in lvtextfm, to prevent wrap where it would be allowed, but +// still accounting for normal wrap points to be used if no other +// non-nowrap text node on the line provides a wrap opportunity. +// - in getRenderedWidths(), where it can impact the widths of +// table cells and floats +// 'pre-line' might be parsed just like 'pre', but rendered just +// like normal (lvtextfm will collapse spaces and wrap on \n) +// 'pre-wrap' is just like 'pre', as we would always wrap to fit +// in the container/screen width +// 'break-spaces' is very similar to 'pre-wrap', except that spaces +// should not be dropped on wrap. We don't ensure that. +// +/// white-space property values: keep them ordered this way for easier checks enum css_white_space_t { css_ws_inherit, css_ws_normal, + css_ws_nowrap, + /* parse XML as 'normal' before this, as 'pre' after this */ + css_ws_pre_line, + /* render text as 'normal' before this, as 'pre' after this */ css_ws_pre, - css_ws_nowrap + css_ws_pre_wrap, + css_ws_break_spaces }; /// text-align property values diff --git a/crengine/include/lvfnt.h b/crengine/include/lvfnt.h index 808e6a135..11f3fa670 100644 --- a/crengine/include/lvfnt.h +++ b/crengine/include/lvfnt.h @@ -227,7 +227,8 @@ lUInt16 lvfontMeasureText( const lvfont_handle pfont, // It is set on soft-hyphen. // It is not set on CJK chars. #define LCHAR_DEPRECATED_WRAP_AFTER 0x0004 ///< flag: line break after this char is possible but deprecated - // It is set on '-' and other unicode hyphens. + // When not using libunibreak: it is set on '-' and other unicode hyphens. + // When using libunibreak: set on all text inside "white-space: nowrap" #define LCHAR_ALLOW_HYPH_WRAP_AFTER 0x0008 ///< flag: line break after this char is allowed with addition of hyphen // It is set by Hyphman when finding hyphenation points in a word. #define LCHAR_MANDATORY_NEWLINE 0x0010 ///< flag: this char must start with new line diff --git a/crengine/include/lvtinydom.h b/crengine/include/lvtinydom.h index b65867092..33dcbca77 100755 --- a/crengine/include/lvtinydom.h +++ b/crengine/include/lvtinydom.h @@ -544,6 +544,10 @@ class tinyNodeCollection public: #if BUILD_LITE!=1 + int getSpaceWidthScalePercent() { + return _spaceWidthScalePercent; + } + bool setSpaceWidthScalePercent(int spaceWidthScalePercent) { if (spaceWidthScalePercent == _spaceWidthScalePercent) return false; diff --git a/crengine/src/lstridmap.cpp b/crengine/src/lstridmap.cpp index c712a219d..a75d42f14 100644 --- a/crengine/src/lstridmap.cpp +++ b/crengine/src/lstridmap.cpp @@ -78,7 +78,7 @@ LDOMNameIdMapItem * LDOMNameIdMapItem::deserialize( SerialBuf & buf ) lUInt8 display; lUInt8 white_space; buf >> display >> white_space >> props.allow_text >> props.is_object; - if ( display > css_d_none || white_space > css_ws_nowrap ) + if ( display > css_d_none || white_space > css_ws_break_spaces ) return NULL; props.display = (css_display_t)display; props.white_space = (css_white_space_t)white_space; diff --git a/crengine/src/lvrend.cpp b/crengine/src/lvrend.cpp index 74e1a8840..e2e681b0f 100755 --- a/crengine/src/lvrend.cpp +++ b/crengine/src/lvrend.cpp @@ -2192,8 +2192,15 @@ lUInt32 styleToTextFmtFlags( const css_style_ref_t & style, lUInt32 oldflags, in } } } - if ( style->white_space == css_ws_pre ) + // We should clean these flags that we got from the parent node via baseFlags: + // CSS white-space inheritance is correctly handled via styles (so, no need + // for this alternative way to ensure inheritance with flags), but might have + // been cancelled and set to some other value (e.g.: normal inside pre) + flg &= ~(LTEXT_FLAG_PREFORMATTED|LTEXT_FLAG_NOWRAP); + if ( style->white_space >= css_ws_pre ) // white-space: pre, pre-wrap, break-spaces flg |= LTEXT_FLAG_PREFORMATTED; + if ( style->white_space == css_ws_nowrap ) // white-space: nowrap + flg |= LTEXT_FLAG_NOWRAP; //flg |= oldflags & ~LTEXT_FLAG_NEWLINE; return flg; } @@ -2463,9 +2470,11 @@ void renderFinalBlock( ldomNode * enode, LFormattedText * txform, RenderRectAcce bool is_rtl = direction == REND_DIRECTION_RTL; // About styleToTextFmtFlags: - // - with inline nodes, it only updates LTEXT_FLAG_PREFORMATTED flag when css_ws_pre - // - with block nodes (so, only with the first "final" node, and not when - // recursing its children which are inline), it will set horitontal alignment flags + // - with inline nodes, it only updates LTEXT_FLAG_PREFORMATTED flag + // when css_ws_pre and LTEXT_FLAG_NOWRAP when css_ws_nowrap. + // - with block nodes (so, only with the first "final" node, and not + // when recursing its children which are inline), it will also set + // horitontal alignment flags. lUInt32 flags = styleToTextFmtFlags( enode->getStyle(), baseflags, direction ); // Note: // - baseflags (passed by reference) is shared and re-used by this node's siblings @@ -9493,6 +9502,12 @@ void getRenderedWidths(ldomNode * node, int &maxWidth, int &minWidth, int direct case css_tt_inherit: break; } + // white-space + // When getting min width, ensure non free wrap for "white-space: pre" (even if we + // don't when rendering). Others like "pre-wrap" and "pre-line" are allowed to wrap. + bool nowrap = (parent_style->white_space == css_ws_nowrap) || (parent_style->white_space == css_ws_pre); + bool pre = parent_style->white_space >= css_ws_pre; + int space_width_scale_percent = pre ? 100 : parent->getDocument()->getSpaceWidthScalePercent(); // measure text const lChar16 * txt = nodeText.c_str(); #ifdef DEBUG_GETRENDEREDWIDTHS @@ -9534,24 +9549,33 @@ void getRenderedWidths(ldomNode * node, int &maxWidth, int &minWidth, int direct // todo: provide direction and hints #if (USE_LIBUNIBREAK==1) for (int i=0; i0 ? widths[i-1] : 0); + if ( (flags[i] & LCHAR_IS_SPACE) && (space_width_scale_percent != 100) ) { + w = w * space_width_scale_percent / 100; + } lChar16 c = *(txt + start + i); lChar16 next_c = *(txt + start + i + 1); // might be 0 at end of string if ( lang_cfg->hasLBCharSubFunc() ) { next_c = lang_cfg->getLBCharSubFunc()(txt+start, i+1, len-1 - (i+1)); } int brk = lb_process_next_char(&lbCtx, (utf32_t)next_c); - // We don't need to bother with collapsing consecutive spaces, as - // we're dealing with a single text node, and the HTML parser has - // removed multiple consecutive spaces (except with PRE, that we - // already did not handle correctly when !USE_LIBUNIBREAK). + // We don't really need to bother with consecutive spaces (that + // should collapse when not 'pre', but libunibreak only allows + // break on the last one, so we would get the leading spaces + // width as part of current word), as we're dealing with a single + // text node, and the HTML parser has removed multiple consecutive + // spaces (except with 'pre', where it looks fine as they don't + // collapse; this might still not be right with pre-wrap though). // printf("between <%c%c>: brk %d\n", c, next_c, brk); - if (brk == LINEBREAK_ALLOWBREAK) { - if (flags[i] & LCHAR_IS_SPACE) { // A space + if (brk == LINEBREAK_ALLOWBREAK && !nowrap) { + if (flags[i] & LCHAR_ALLOW_WRAP_AFTER) { // a breakable/collapsible space (flag set by measureText() if (collapseNextSpace) // ignore this space continue; collapseNextSpace = true; // ignore next spaces, even if in another node - lastSpaceWidth = w; + lastSpaceWidth = pre ? 0 : w; // Don't remove last space width if 'pre' curMaxWidth += w; // add this space to non-wrap width if (curWordWidth > 0) { // there was a word before this space if (start+i > 0) { @@ -9587,9 +9611,44 @@ void getRenderedWidths(ldomNode * node, int &maxWidth, int &minWidth, int direct curMaxWidth += left_overflow; // also add it to max width } } + else if (brk == LINEBREAK_MUSTBREAK) { // \n if pre + // Get done with current word + if (curWordWidth > 0) { // we end with a word + if (start+i > 0) { + // adjust for last word's last char or previous CJK char right overflow + lChar16 prevc = *(txt + start + i - 1); + int right_overflow = - font->getRightSideBearing(prevc, true, true); + curWordWidth += right_overflow; + curMaxWidth += right_overflow; + } + } + // Similar to what's done above on
or at end of final node + if (lastSpaceWidth) + curMaxWidth -= lastSpaceWidth; + if (curMaxWidth > maxWidth) + maxWidth = curMaxWidth; + if (curWordWidth > minWidth) + minWidth = curWordWidth; + // Get ready for next text + curMaxWidth = indent; + curWordWidth = indent; + collapseNextSpace = true; // skip leading spaces + lastSpaceWidth = 0; + } else { // break not allowed: this char is part of a word - collapseNextSpace = false; // next space should not be ignored - lastSpaceWidth = 0; // no width to take off if we stop with this char + // But it can be a space followed by another space (with libunibreak, + // only the last space will get LINEBREAK_ALLOWBREAK). + if (flags[i] & LCHAR_ALLOW_WRAP_AFTER) { // a breakable/collapsible space (flag set by measureText() + if (collapseNextSpace) { // space before (and space after) + continue; // ignore it + } + collapseNextSpace = true; // ignore next ones + lastSpaceWidth = pre ? 0 : w; // Don't remove last space width if 'pre' + } + else { // Not a space + collapseNextSpace = false; // next space should not be ignored + lastSpaceWidth = 0; // no width to take off if we stop with this char + } if (curWordWidth == 0) { // first char of a word // adjust for leading overflow on first char of a word int left_overflow = - font->getLeftSideBearing(c, false, true); @@ -9604,9 +9663,13 @@ void getRenderedWidths(ldomNode * node, int &maxWidth, int &minWidth, int direct } } #else // not USE_LIBUNIBREAK==1 + // (This has not been updated to handle nowrap & pre) for (int i=0; i0 ? widths[i-1] : 0); lChar16 c = *(txt + start + i); + if ( (flags[i] & LCHAR_IS_SPACE) && (space_width_scale_percent != 100) ) { + w = w * space_width_scale_percent / 100; + } bool is_cjk = (c >= UNICODE_CJK_IDEOGRAPHS_BEGIN && c <= UNICODE_CJK_IDEOGRAPHS_END && ( c<=UNICODE_CJK_PUNCTUATION_HALF_AND_FULL_WIDTH_BEGIN || c>=UNICODE_CJK_PUNCTUATION_HALF_AND_FULL_WIDTH_END) ); diff --git a/crengine/src/lvstsheet.cpp b/crengine/src/lvstsheet.cpp index 42c961159..e3700c69d 100644 --- a/crengine/src/lvstsheet.cpp +++ b/crengine/src/lvstsheet.cpp @@ -905,8 +905,11 @@ static const char * css_ws_names[] = { "inherit", "normal", - "pre", "nowrap", + "pre-line", + "pre", + "pre-wrap", + "break-spaces", NULL }; diff --git a/crengine/src/lvtextfm.cpp b/crengine/src/lvtextfm.cpp index e8d628525..ae4919d0c 100755 --- a/crengine/src/lvtextfm.cpp +++ b/crengine/src/lvtextfm.cpp @@ -963,13 +963,92 @@ class LVFormatter { int i; bool prev_was_space = true; // start with true, to get rid of all leading spaces int last_non_space_pos = -1; // to get rid of all trailing spaces + src_text_fragment_t * prev_src = NULL; + for ( i=start; isrctext[i]; + + // We will compute wrap rules as if there were no "white-space: nowrap", as + // we might end up not ensuring nowrap. We just flag all chars (but the last + // one) inside a text node with "nowrap" with LCHAR_DEPRECATED_WRAP_AFTER, + // and processParagraph() will deal with chars that have both ALLOW_WRAP_AFTER + // and DEPRECATED_WRAP_AFTER. + bool nowrap = src->flags & LTEXT_FLAG_NOWRAP; + if ( nowrap && pos > 0 ) { + // We still need to do the right thing at boundaries between 2 nodes + // with nowrap - and update flags on the last char of previous node. + // If NOWRAP|NOWRAP: wrap after last char of 1st node is permitted + // If NOWRAP|WRAP : wrap after last char of 1st node is permitted + // If WRAP|NOWRAP: wrap after last char of 1st node is permitted + // If WRAP|WRAP : it depends + bool handled = false; + if ( prev_src && (prev_src->flags & LTEXT_FLAG_NOWRAP) ) { + // We don't have much context about these text nodes. + // 2 consecutive text nodes might both have "white-space: nowrap", + // but it might be allowed to wrap between them if the node that + // contains them isn't "nowrap". + // So, try to do it that way: + // - if both have it, and not their common parent container (so + // it's not inherited): a wrap should be allowed between them. + // - if both have it, and their parent container too, a wrap + // shouldn't be allowed between them + ldomNode * prev_node = (ldomNode *)prev_src->object; + ldomNode * this_node = (ldomNode *)src->object; + if ( prev_node && this_node ) { + ldomXRange r = ldomXRange( ldomXPointer(prev_node,0), ldomXPointer(this_node,0) ); + ldomNode * parent = r.getNearestCommonParent(); + if ( parent && parent->getStyle()->white_space == css_ws_nowrap ) { + m_flags[pos-1] |= LCHAR_DEPRECATED_WRAP_AFTER; + handled = true; + } + } + else { + // One of the 2 nodes is some generated content (list marker, + // quote char, BDI wrapping chars) that does not map to a + // document node (and we can't reach its parent from here). + // Not sure if this would be always good, but let's assume + // we want nowrap continuity. + m_flags[pos-1] |= LCHAR_DEPRECATED_WRAP_AFTER; + handled = true; + } + } + if ( !handled && src->flags & (LTEXT_SRC_IS_INLINE_BOX|LTEXT_SRC_IS_OBJECT) ) { + // Not per-spec, but might be handy: + // If an image or our internal inlineBox element has been set + // to "white-space: nowrap", it's most probably that it has + // inherited it from its parent node - as it's quite unprobable + // in real-life that an image was set to "white-space: nowrap" + // itself, as it would have no purpose. As for inlineBox, + // the original element that has "display: inline-block; + // white-space: nowrap" is actually the child of the inlineBox, + // and will have it - but they are not propagated up to the + // inlineBox wrapper. + // So, assume that if such image or inlineBox has it, while + // its parent does not, it's because it has been set via + // a Style tweak, and that we have used that trick in the + // aim to prevent a wrap around it. libunibreak defaults to + // allowing a wrap on both sides of such replaced elements; + // this allows to easily change this when needed. + // (Use-case seen: book with footnotes links that are + // set "display:inline-block", which libunibreak could + // put at start of line - while we'd rather want them + // stuck to the word they follow). + ldomNode * this_node = (ldomNode *)src->object; + if ( this_node ) { + ldomNode * parent = this_node->getParentNode(); + if ( parent && parent->getStyle()->white_space != css_ws_nowrap ) { + m_flags[pos-1] |= LCHAR_DEPRECATED_WRAP_AFTER; // avoid wrap before it + m_flags[pos] |= LCHAR_DEPRECATED_WRAP_AFTER; // avoid wrap after it + } + } + } + } + if ( src->flags & LTEXT_SRC_IS_FLOAT ) { m_text[pos] = 0; m_srcs[pos] = src; - m_flags[pos] = LCHAR_IS_OBJECT; m_charindex[pos] = FLOAT_CHAR_INDEX; //0xFFFE; + m_flags[pos] = LCHAR_IS_OBJECT; // Note: m_flags was a lUInt8, and there were already 8 LCHAR_IS_* bits/flags // so we couldn't add our own. But using LCHAR_IS_OBJECT should not hurt, // as we do the FLOAT tests before it is used. @@ -986,6 +1065,7 @@ class LVFormatter { // with specifically in splitParagraphs() by processEmbeddedBlock(). m_text[pos] = 0; m_srcs[pos] = src; + m_charindex[pos] = INLINEBOX_CHAR_INDEX; //0xFFFD; m_flags[pos] = LCHAR_IS_OBJECT; #if (USE_LIBUNIBREAK==1) // Let libunibreak know there was an object, for the followup text @@ -1000,7 +1080,6 @@ class LVFormatter { #else m_flags[pos] |= LCHAR_ALLOW_WRAP_AFTER; #endif - m_charindex[pos] = INLINEBOX_CHAR_INDEX; //0xFFFD; last_non_space_pos = pos; prev_was_space = false; pos++; @@ -1008,6 +1087,7 @@ class LVFormatter { else if ( src->flags & LTEXT_SRC_IS_OBJECT ) { m_text[pos] = 0; m_srcs[pos] = src; + m_charindex[pos] = OBJECT_CHAR_INDEX; //0xFFFF; m_flags[pos] = LCHAR_IS_OBJECT; #if (USE_LIBUNIBREAK==1) // Let libunibreak know there was an object @@ -1019,7 +1099,6 @@ class LVFormatter { #else m_flags[pos] |= LCHAR_ALLOW_WRAP_AFTER; #endif - m_charindex[pos] = OBJECT_CHAR_INDEX; //0xFFFF; last_non_space_pos = pos; prev_was_space = false; pos++; @@ -1113,7 +1192,10 @@ class LVFormatter { last_non_space_pos = pos; if ( !is_space ) m_allow_strut_confinning = true; - prev_was_space = is_space; + prev_was_space = is_space || (c == '\n'); + // We might meet '\n' in PRE text, which shouldn't make any space + // collapsed - except when "white-space: pre-line". So, have + // a space following a \n be allowed to collapse. /* non-optimized implementation of "(a) A sequence of segment breaks * and other white space between two Chinese, Japanese, or Yi characters @@ -1171,6 +1253,13 @@ class LVFormatter { */ #if (USE_LIBUNIBREAK==1) + if ( nowrap ) { + // If "white-space: nowrap", we flag everything but the last char + // (So, for a 1 char long text node, no flag.) + if ( k < len-1 ) { + m_flags[pos] |= LCHAR_DEPRECATED_WRAP_AFTER; + } + } lChar16 ch = m_text[pos]; if ( src->lang_cfg->hasLBCharSubFunc() ) { // Lang specific function may want to substitute char (for @@ -1268,6 +1357,7 @@ class LVFormatter { pos++; } } + prev_src = src; } // Also flag as collapsed all spaces at the end of text pos = pos-1; // get back last pos++ @@ -3356,7 +3446,7 @@ class LVFormatter { } int w0 = pos>0 ? m_widths[pos-1] : 0; // measured cumulative width at start of this line int lastNormalWrap = -1; - int lastDeprecatedWrap = -1; // Not updated (so, not used) when USE_LIBUNIBREAK + int lastDeprecatedWrap = -1; // Different usage whether USE_LIBUNIBREAK or not (see below) int lastHyphWrap = -1; int lastMandatoryWrap = -1; int spaceReduceWidth = 0; // max total line width which can be reduced by narrowing of spaces @@ -3422,8 +3512,7 @@ class LVFormatter { if (!seen_non_collapsed_space) { if (flags & LCHAR_IS_COLLAPSED_SPACE) continue; - else - seen_non_collapsed_space = true; + seen_non_collapsed_space = true; } if ( !seen_first_rendered_char ) { seen_first_rendered_char = true; @@ -3486,7 +3575,15 @@ class LVFormatter { #if (USE_LIBUNIBREAK==1) // Note: with libunibreak, we can't assume anymore that LCHAR_ALLOW_WRAP_AFTER is synonym to IS_SPACE. if (flags & LCHAR_ALLOW_WRAP_AFTER) { - lastNormalWrap = i; + if (flags & LCHAR_DEPRECATED_WRAP_AFTER) { + // Allowed by libunibreak, but prevented by "white-space: nowrap" on + // this text node parent. Store this opportunity as lastDeprecatedWrap, + // that we will use only if no lastNormalWrap found. + lastDeprecatedWrap = i; + } + else { + lastNormalWrap = i; + } } #else // A space or a CJK ideograph make a normal allowed wrap @@ -3535,11 +3632,16 @@ class LVFormatter { // Note that a wrap can happen AFTER a '-' (that has CH_PROP_AVOID_WRAP_AFTER) // when lastDeprecatedWrap is prefered below. } + else if ( flags & LCHAR_DEPRECATED_WRAP_AFTER ) { + // Different meaning than when USE_LIBUNIBREAK: it is set + // by lastFont->measureText() on some hyphens. + // (To keep this legacy behaviour and not complexify things, we don't + // ensure "white-space: nowrap" when not using libunibreak.) + lastDeprecatedWrap = i; // Hyphens make a less priority wrap + } #endif // not USE_LIBUNIBREAK==1 - else if ( i==m_length-1 ) // Last char + if ( i==m_length-1 ) // Last char always provides a normal wrap lastNormalWrap = i; - else if ( flags & LCHAR_DEPRECATED_WRAP_AFTER ) // does not happen when USE_LIBUNIBREAK - lastDeprecatedWrap = i; // Hyphens make a less priority wrap if ( !grabbedExceedingSpace && m_pbuffer->min_space_condensing_percent != 100 && i < m_length-1 && @@ -3563,16 +3665,30 @@ class LVFormatter { i = pos + 1; // allow at least one character to be shown on line int wordpos = i-1; // Last char which fits: hyphenation does not need to check further + #if (USE_LIBUNIBREAK==1) + // If no normal wrap found, and if we have a deprecated wrap (a normal wrap + // as determined by libunibreak, but prevented by "white-space: nowrap", + // it's because the line has no wrap opportunity outside nodes with + // "white-space: nowrap". + // We need to wrap, and it's best to do so at a regular opportunity rather + // than at some arbitrary point: do as it there were no "nowrap". + if ( lastNormalWrap < 0 && lastDeprecatedWrap > 0 ) { + lastNormalWrap = lastDeprecatedWrap; + } + #endif int normalWrapWidth = lastNormalWrap > 0 ? x + m_widths[lastNormalWrap]-w0 : 0; - int deprecatedWrapWidth = lastDeprecatedWrap > 0 ? x + m_widths[lastDeprecatedWrap]-w0 : 0; int unusedSpace = maxWidth - normalWrapWidth; if ( visualAlignmentEnabled ) { unusedSpace -= 2*visualAlignmentWidth; } int unusedPercent = maxWidth > 0 ? unusedSpace * 100 / maxWidth : 0; - if ( deprecatedWrapWidth > normalWrapWidth && unusedPercent > 3 ) { // only 3% - lastNormalWrap = lastDeprecatedWrap; - } + #if (USE_LIBUNIBREAK!=1) + // (Different usage of deprecatedWrap than above) + int deprecatedWrapWidth = lastDeprecatedWrap > 0 ? x + m_widths[lastDeprecatedWrap]-w0 : 0; + if ( deprecatedWrapWidth > normalWrapWidth && unusedPercent > 3 ) { // only 3% + lastNormalWrap = lastDeprecatedWrap; + } + #endif // If, with normal wrapping, more than 5% of the line would not be used, // try to find a word (from where we stopped back to lastNormalWrap) to @@ -3598,7 +3714,7 @@ class LVFormatter { printf("hyph loop #%d checking: %s\n", debug_loop_num, LCSTR(lString16(m_text+wordpos_min, i-wordpos_min+1))); #endif - if ( !(m_srcs[wordpos]->flags & LTEXT_HYPHENATE) ) { + if ( !(m_srcs[wordpos]->flags & LTEXT_HYPHENATE) || (m_srcs[wordpos]->flags & LTEXT_FLAG_NOWRAP) ) { // The word at worpos can't be hyphenated, but it might be // allowed on some earlier word in another text node. // As this is a rare situation (they are mostly all hyphenat'able, diff --git a/crengine/src/lvtinydom.cpp b/crengine/src/lvtinydom.cpp index 9dcc80272..758205c47 100644 --- a/crengine/src/lvtinydom.cpp +++ b/crengine/src/lvtinydom.cpp @@ -4832,8 +4832,11 @@ ldomElementWriter::ldomElementWriter(ldomDocument * document, lUInt16 nsid, lUIn //logfile << "{c"; _typeDef = _document->getElementTypePtr( id ); _flags = 0; - if ( (_typeDef && _typeDef->white_space==css_ws_pre) || (_parent && _parent->getFlags()&TXTFLG_PRE) ) - _flags |= TXTFLG_PRE; + if ( (_typeDef && _typeDef->white_space >= css_ws_pre_line) || (_parent && _parent->getFlags()&TXTFLG_PRE) ) + _flags |= TXTFLG_PRE; // Parse as PRE: pre-line, pre, pre-wrap and break-spaces + // This will be updated in ldomElementWriter::onBodyEnter() after we have + // set styles to this node, so we'll get the real white_space value to use. + _isSection = (id==el_section); // Default (for elements not specified in fb2def.h) is to allow text @@ -4980,10 +4983,13 @@ void ldomElementWriter::onBodyEnter() // crFatalError(); // } _isBlock = isBlockNode(_element); - // If initNodeStyle() has set "white-space: pre", update _flags - if ( _element->getStyle()->white_space == css_ws_pre) { + // If initNodeStyle() has set "white-space: pre" or alike, update _flags + if ( _element->getStyle()->white_space >= css_ws_pre_line) { _flags |= TXTFLG_PRE; } + else { + _flags &= ~TXTFLG_PRE; + } } else { } if ( _isSection ) { @@ -5026,7 +5032,8 @@ void ldomNode::autoboxChildren( int startIndex, int endIndex, bool handleFloatin if ( !isElement() ) return; css_style_ref_t style = getStyle(); - bool pre = ( style->white_space==css_ws_pre ); + bool pre = ( style->white_space >= css_ws_pre_line ); + // (css_ws_pre_line might need special care?) int firstNonEmpty = startIndex; int lastNonEmpty = endIndex; @@ -5179,8 +5186,8 @@ bool ldomNode::cleanIfOnlyEmptyTextInline( bool handleFloating ) if ( !isElement() ) return false; css_style_ref_t style = getStyle(); - if ( style->white_space==css_ws_pre ) - return false; // Don't mess with PRE + if ( style->white_space >= css_ws_pre ) + return false; // Don't mess with PRE (css_ws_pre_line might need special care?) // We return false as soon as we find something non text, or text non empty int i = getChildCount()-1; for ( ; i>=0; i-- ) { @@ -13129,14 +13136,16 @@ lUInt32 tinyNodeCollection::calcStyleHash() res = res * 31 + sh; if (!style.isNull()) { _nodeDisplayStyleHash = _nodeDisplayStyleHash * 31 + style.get()->display; - // Also account in this hash if this node is "white_space: pre" - // If white_space change from/to "pre" to/from any other value, + // Also account in this hash if this node is "white_space: pre" or alike. + // If white_space changes from/to "pre"-like to/from "normal"-like, // the document will need to be reloaded so that the HTML text parts // are parsed according the the PRE/not-PRE rules - if (style.get()->white_space == css_ws_pre) _nodeDisplayStyleHash += 29; + if (style.get()->white_space >= css_ws_pre_line) + _nodeDisplayStyleHash += 29; // Also account for style->float_, as it should create/remove new floatBox // elements wrapping floats when toggling BLOCK_RENDERING_G(ENHANCED) - if (style.get()->float_ > css_f_none) _nodeDisplayStyleHash += 123; + if (style.get()->float_ > css_f_none) + _nodeDisplayStyleHash += 123; } //printf("element %d %d style hash: %x\n", i, j, sh); LVFontRef font = buf[j].getFont(); From c29fea4cfd367fe6415e05056b7d05c1af5c0bdd Mon Sep 17 00:00:00 2001 From: poire-z Date: Thu, 4 Jun 2020 16:45:11 +0200 Subject: [PATCH 08/11] Text: fix standalone BR not making an empty line (rework) Rework 89af0637: we might want our added content to get space collapsing. We have to provide LTEXT_FLAG_PREFORMATTED when we don't want that. --- crengine/src/lvrend.cpp | 12 +++++++++--- crengine/src/lvtextfm.cpp | 9 +++++---- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/crengine/src/lvrend.cpp b/crengine/src/lvrend.cpp index e2e681b0f..33a928575 100755 --- a/crengine/src/lvrend.cpp +++ b/crengine/src/lvrend.cpp @@ -3154,7 +3154,9 @@ void renderFinalBlock( ldomNode * enode, LFormattedText * txform, RenderRectAcce lUInt32 cl = style->color.type!=css_val_color ? 0xFFFFFFFF : style->color.value; lUInt32 bgcl = style->background_color.type!=css_val_color ? 0xFFFFFFFF : style->background_color.value; lChar16 delimiter[] = {UNICODE_NO_BREAK_SPACE, UNICODE_NO_BREAK_SPACE}; //160 - txform->AddSourceLine( delimiter, sizeof(delimiter)/sizeof(lChar16), cl, bgcl, font, lang_cfg, LTEXT_FLAG_OWNTEXT | LTEXT_RUNIN_FLAG, line_h, valign_dy, 0, NULL ); + txform->AddSourceLine( delimiter, sizeof(delimiter)/sizeof(lChar16), cl, bgcl, font, lang_cfg, + LTEXT_RUNIN_FLAG | LTEXT_FLAG_PREFORMATTED | LTEXT_FLAG_OWNTEXT, + line_h, valign_dy, 0, NULL ); flags &= ~LTEXT_RUNIN_FLAG; } } @@ -3191,7 +3193,9 @@ void renderFinalBlock( ldomNode * enode, LFormattedText * txform, RenderRectAcce LVFont * font = enode->getFont().get(); lUInt32 cl = style->color.type!=css_val_color ? 0xFFFFFFFF : style->color.value; lUInt32 bgcl = style->background_color.type!=css_val_color ? 0xFFFFFFFF : style->background_color.value; - txform->AddSourceLine( L" ", 1, cl, bgcl, font, lang_cfg, baseflags | LTEXT_FLAG_OWNTEXT, line_h, valign_dy); + txform->AddSourceLine( L" ", 1, cl, bgcl, font, lang_cfg, + baseflags | LTEXT_FLAG_PREFORMATTED | LTEXT_FLAG_OWNTEXT, + line_h, valign_dy); // baseflags &= ~LTEXT_FLAG_NEWLINE; // clear newline flag // No need to clear the flag, as we set it just below // (any LTEXT_ALIGN_* set implies LTEXT_FLAG_NEWLINE) @@ -3251,7 +3255,9 @@ void renderFinalBlock( ldomNode * enode, LFormattedText * txform, RenderRectAcce LVFont * font = enode->getFont().get(); lUInt32 cl = style->color.type!=css_val_color ? 0xFFFFFFFF : style->color.value; lUInt32 bgcl = style->background_color.type!=css_val_color ? 0xFFFFFFFF : style->background_color.value; - txform->AddSourceLine( L" ", 1, cl, bgcl, font, lang_cfg, baseflags|LTEXT_SRC_IS_CLEAR_LAST|LTEXT_FLAG_OWNTEXT, line_h, valign_dy); + txform->AddSourceLine( L" ", 1, cl, bgcl, font, lang_cfg, + baseflags | LTEXT_SRC_IS_CLEAR_LAST | LTEXT_FLAG_PREFORMATTED | LTEXT_FLAG_OWNTEXT, + line_h, valign_dy); } } else if ( enode->isText() ) { diff --git a/crengine/src/lvtextfm.cpp b/crengine/src/lvtextfm.cpp index ae4919d0c..deff77d53 100755 --- a/crengine/src/lvtextfm.cpp +++ b/crengine/src/lvtextfm.cpp @@ -1175,12 +1175,13 @@ class LVFormatter { lChar16 c = m_text[pos]; bool is_space = (c == ' '); - if ( is_space && prev_was_space && !preformatted && src->object ) { + if ( is_space && prev_was_space && !preformatted ) { // On non-pre paragraphs, flag spaces following a space // so we can discard them later. - // (But only if the space is from a document text node (it then - // has a non-NULL ->object), to keep those we added for empty - // lines or identation with 'txform->AddSourceLine(L" "...)'.) + // Note: for the empty lines or indentation we might add + // with 'txform->AddSourceLine(L" "...)', we need to + // provide LTEXT_FLAG_PREFORMATTED if we don't want them + // to be collapsed. m_flags[pos] = LCHAR_IS_COLLAPSED_SPACE | LCHAR_ALLOW_WRAP_AFTER; // m_text[pos] = '_'; // uncomment when debugging // (We can replace the char to see it in printf() (m_text is not the From 7e5fa24c341eec34dfcf6ea3cc2da56de460c417 Mon Sep 17 00:00:00 2001 From: poire-z Date: Thu, 4 Jun 2020 16:45:13 +0200 Subject: [PATCH 09/11] CSS: support for pseudo elements ::before & ::after Handle parsing of '::before', '::after' (CSS3), ':before' and ':after (CSS2) in selectors. Properly check if they should be generated or not, and if yes, insert a new internal element in the DOM: pseudoElem. Handle needed added CSS property: 'content:'. Parse original values and store a pre-computed string in style->content, ready to be used to get the final generated content for a node. Supports string, attributes, open/close-quote. Replaces specific handling in the code with: q::before { content: open-quote; } q::after { content: close-quote; } --- cr3gui/data/epub.css | 4 + crengine/include/fb2def.h | 12 + crengine/include/lvrend.h | 2 +- crengine/include/lvstsheet.h | 36 ++- crengine/include/lvstyles.h | 27 ++- crengine/include/lvtinydom.h | 17 +- crengine/src/lvrend.cpp | 150 ++++++++---- crengine/src/lvstsheet.cpp | 436 ++++++++++++++++++++++++++++++++++- crengine/src/lvstyles.cpp | 6 +- crengine/src/lvtinydom.cpp | 175 ++++++++++++-- 10 files changed, 767 insertions(+), 98 deletions(-) diff --git a/cr3gui/data/epub.css b/cr3gui/data/epub.css index b47588315..d456526a9 100644 --- a/cr3gui/data/epub.css +++ b/cr3gui/data/epub.css @@ -146,6 +146,10 @@ u, ins { text-decoration: underline; } del, s, strike { text-decoration: line-through; } a { text-decoration: underline; color: gray; } +/* No support for the "quotes:" property, these will use default quote chars */ +q::before { content: open-quote; } +q::after { content: close-quote; } + nobr { display: inline; hyphens: none; diff --git a/crengine/include/fb2def.h b/crengine/include/fb2def.h index 0105a6eb2..d3146555d 100644 --- a/crengine/include/fb2def.h +++ b/crengine/include/fb2def.h @@ -29,6 +29,8 @@ //===================================================== XS_BEGIN_TAGS +// Boxing elements (inserted in the DOM tree between original parent and children): +// // Internal element for block wrapping inline elements (without a proper parent // block container) among proper block siblings (would be better named "blockBox") XS_TAG1T( autoBoxing ) @@ -38,6 +40,14 @@ XS_TAG1T( tabularBox ) XS_TAG1T( floatBox ) // Internal element for inline-block and inline-table rendering XS_TAG1I( inlineBox ) + +// Internal element created for CSS pseudo elements ::before and ::after : +// - defaults to "display: none", but will be set to "inline" when style is applied +// - it doesn't have a text node child, the content will be fetched from +// its style->content when rendering and drawing text. +// It does not box anything and has no child, so it's not considered a boxing node. +XS_TAG1D( pseudoElem, false, css_d_none, css_ws_normal ) + // Internal element for EPUB, containing each individual HTML file XS_TAG1( DocFragment ) @@ -256,6 +266,8 @@ XS_ATTR( role ) XS_ATTR( dir ) XS_ATTR( lang ) XS_ATTR( recindex ) // used with mobi images +XS_ATTR( Before ) // for pseudoElem internal element +XS_ATTR( After ) // for pseudoElem internal element // Other classic attributes present in html5.css XS_ATTR2( accept_charset, "accept-charset" ) XS_ATTR( alt ) diff --git a/crengine/include/lvrend.h b/crengine/include/lvrend.h index ad4c5fd49..22d46b9fe 100644 --- a/crengine/include/lvrend.h +++ b/crengine/include/lvrend.h @@ -147,7 +147,7 @@ void DrawDocument( LVDrawBuf & drawbuf, ldomNode * node, int x0, int y0, int dx, // full function for recursive use: void getRenderedWidths(ldomNode * node, int &maxWidth, int &minWidth, int direction, bool ignorePadding, int rendFlags, int &curMaxWidth, int &curWordWidth, bool &collapseNextSpace, int &lastSpaceWidth, - int indent, TextLangCfg * lang_cfg, bool isStartNode=false); + int indent, TextLangCfg * lang_cfg, bool processNodeAsText=false, bool isStartNode=false); // simpler function for first call: void getRenderedWidths(ldomNode * node, int &maxWidth, int &minWidth, int direction=REND_DIRECTION_UNSET, bool ignorePadding=false, int rendFlags=0); diff --git a/crengine/include/lvstsheet.h b/crengine/include/lvstsheet.h index ec0bd888b..e5cba4725 100644 --- a/crengine/include/lvstsheet.h +++ b/crengine/include/lvstsheet.h @@ -130,6 +130,20 @@ static const char * css_pseudo_classes[] = NULL }; +// https://developer.mozilla.org/en-US/docs/Web/CSS/Pseudo-elements +enum LVCssSelectorPseudoElement +{ + csspe_before = 1, // ::before + csspe_after = 2, // ::after +}; + +static const char * css_pseudo_elements[] = +{ + "before", + "after", + NULL +}; + enum LVCssSelectorRuleType { cssrt_universal, // * @@ -198,22 +212,33 @@ class LVCssSelector { lUInt16 _id; LVCssDeclRef _decl; int _specificity; + int _pseudo_elem; // from enum LVCssSelectorPseudoElement, or 0 LVCssSelector * _next; LVCssSelectorRule * _rules; void insertRuleStart( LVCssSelectorRule * rule ); void insertRuleAfterStart( LVCssSelectorRule * rule ); public: LVCssSelector( LVCssSelector & v ); - LVCssSelector() : _id(0), _specificity(0), _next(NULL), _rules(NULL) { } - LVCssSelector(int specificity) : _id(0), _specificity(specificity), _next(NULL), _rules(NULL) { } + LVCssSelector() : _id(0), _specificity(0), _pseudo_elem(0), _next(NULL), _rules(NULL) { } + LVCssSelector(int specificity) : _id(0), _specificity(specificity), _pseudo_elem(0), _next(NULL), _rules(NULL) { } ~LVCssSelector() { if (_next) delete _next; if (_rules) delete _rules; } bool parse( const char * &str, lxmlDocBase * doc ); lUInt16 getElementNameId() { return _id; } bool check( const ldomNode * node ) const; + void applyToPseudoElement( const ldomNode * node, css_style_rec_t * style ) const; void apply( const ldomNode * node, css_style_rec_t * style ) const { - if (check( node )) - _decl->apply(style); + if (check( node )) { + if ( _pseudo_elem > 0 ) { + applyToPseudoElement(node, style); + } + else { + _decl->apply(style); + } + // style->flags |= STYLE_REC_FLAG_MATCHED; + // Done in applyToPseudoElement() as currently only needed there. + // Uncomment if more generic usage needed. + } } void setDeclaration( LVCssDeclRef decl ) { _decl = decl; } int getSpecificity() { return _specificity; } @@ -305,6 +330,9 @@ class LVStyleSheet { /// parse color value like #334455, #345 or red bool parse_color_value( const char * & str, css_length_t & value ); +/// get computed value for a node from its parsed CSS "content:" value +lString16 get_applied_content_property( ldomNode * node ); + /// extract @import filename from beginning of CSS bool LVProcessStyleSheetImport( const char * &str, lString8 & import_file ); /// load stylesheet from file, with processing of import diff --git a/crengine/include/lvstyles.h b/crengine/include/lvstyles.h index b2e9e7837..49ede5bf3 100644 --- a/crengine/include/lvstyles.h +++ b/crengine/include/lvstyles.h @@ -82,20 +82,27 @@ enum css_style_rec_important_bit { imp_bit_float = 1ULL << 55, imp_bit_clear = 1ULL << 56, imp_bit_direction = 1ULL << 57, - imp_bit_cr_hint = 1ULL << 58 + imp_bit_content = 1ULL << 58, + imp_bit_cr_hint = 1ULL << 59 }; +// Style handling flags +#define STYLE_REC_FLAG_MATCHED 0x01 // This style has had some stylesheet declaration matched and applied. + // Currently only used for a pseudo element style, + // see LVCssSelector::apply() if more generic usage needed. + /** \brief Element style record. Contains set of style properties. */ -typedef struct css_style_rec_tag { +typedef struct css_style_rec_tag css_style_rec_t; +struct css_style_rec_tag { int refCount; // for reference counting lUInt32 hash; // cache calculated hash value here lUInt64 important; // bitmap for !important (used only by LVCssDeclaration) - // we have currently below 59 css properties - // lvstsheet knows about 81, which are mapped to these 59 + // we have currently below 60 css properties + // lvstsheet knows about 82, which are mapped to these 60 // update bits above if you add new properties below lUInt64 importance; // bitmap for important bit's importance/origin // (allows for 2 level of !important importance) @@ -144,7 +151,14 @@ typedef struct css_style_rec_tag { css_float_t float_; // "float" is a C++ keyword... css_clear_t clear; css_direction_t direction; + lString16 content; css_cr_hint_t cr_hint; + // The following should only be used when applying stylesheets while in lvend.cpp setNodeStyle(), + // and cleaned up there, before the style is cached and shared. They are not serialized. + lInt8 flags; // bitmap of STYLE_REC_FLAG_* + css_style_rec_t * pseudo_elem_before_style; + css_style_rec_t * pseudo_elem_after_style; + css_style_rec_tag() : refCount(0) , hash(0) @@ -189,6 +203,9 @@ typedef struct css_style_rec_tag { , clear(css_c_none) , direction(css_dir_inherit) , cr_hint(css_cr_hint_none) + , flags(0) + , pseudo_elem_before_style(NULL) + , pseudo_elem_after_style(NULL) { // css_length_t fields are initialized by css_length_tag() // to (css_val_screen_px, 0) @@ -233,7 +250,7 @@ typedef struct css_style_rec_tag { if (is_important == 0x3) importance |= bit; } }; -} css_style_rec_t; +}; /// style record reference type typedef LVFastRef< css_style_rec_t > css_style_ref_t; diff --git a/crengine/include/lvtinydom.h b/crengine/include/lvtinydom.h index 33dcbca77..a11182c83 100755 --- a/crengine/include/lvtinydom.h +++ b/crengine/include/lvtinydom.h @@ -851,6 +851,10 @@ class ldomNode // the wrapping element. ldomNode * boxWrapChildren( int startIndex, int endIndex, lUInt16 elementId ); + // Ensure this node has a ::before/::after pseudo element as + // child, creating it if needed and possible + void ensurePseudoElement( bool is_before ); + /// if stylesheet file name is set, and file is found, set stylesheet to its value bool applyNodeStylesheet(); @@ -1010,7 +1014,7 @@ class ldomNode void setRendMethod( lvdom_element_render_method ); #if BUILD_LITE!=1 /// returns element style record - css_style_ref_t getStyle(); + css_style_ref_t getStyle() const; /// returns element font font_ref_t getFont(); /// sets element font @@ -1071,17 +1075,17 @@ class ldomNode /// for display:list-item node, get marker bool getNodeListMarker( int & counterValue, lString16 & marker, int & markerWidth ); /// is node a floating floatBox - bool isFloatingBox(); + bool isFloatingBox() const; /// is node an inlineBox that has not been re-inlined by having /// its child no more inline-block/inline-table - bool isBoxingInlineBox(); + bool isBoxingInlineBox() const; /// is node an inlineBox that wraps a bogus embedded block (not inline-block/inline-table) /// can be called with inline_box_checks_done=true when isBoxingInlineBox() has already /// been called to avoid rechecking what is known - bool isEmbeddedBlockBoxingInlineBox(bool inline_box_checks_done=false); + bool isEmbeddedBlockBoxingInlineBox(bool inline_box_checks_done=false) const; - /// is node any of our internal boxing element - bool isBoxingNode(); + /// is node any of our internal boxing element (or, optionally, our pseudoElem) + bool isBoxingNode( bool orPseudoElem=false ) const; /// return real (as in the original HTML) parent/siblings by skipping any internal /// boxing element up or down (returns NULL when no more sibling) @@ -2556,6 +2560,7 @@ class ldomElementWriter bool _isSection; bool _stylesheetIsSet; bool _bodyEnterCalled; + int _pseudoElementAfterChildIndex; lUInt32 _flags; lUInt32 getFlags(); void updateTocItem(); diff --git a/crengine/src/lvrend.cpp b/crengine/src/lvrend.cpp index 33a928575..a8392ccc3 100755 --- a/crengine/src/lvrend.cpp +++ b/crengine/src/lvrend.cpp @@ -2962,7 +2962,7 @@ void renderFinalBlock( ldomNode * enode, LFormattedText * txform, RenderRectAcce break; } } - // These might have no effect, but let's explicitely dropped them. + // These might have no effect, but let's explicitely drop them. valign_dy = 0; indent = 0; // Note: a space just before or just after (because of a newline in @@ -3000,14 +3000,14 @@ void renderFinalBlock( ldomNode * enode, LFormattedText * txform, RenderRectAcce // Some elements add some generated content lUInt16 nodeElementId = enode->getNodeId(); // Don't handle dir= for the erm_final (

hasAttribute( attr_dir ) && rm != erm_final && rm != erm_table_caption && rm != erm_list_item; bool addGeneratedContent = hasDirAttribute || nodeElementId == el_bdi || nodeElementId == el_bdo || - nodeElementId == el_q; + nodeElementId == el_pseudoElem; bool closeWithPDI = false; bool closeWithPDF = false; bool closeWithPDFPDI = false; @@ -3018,20 +3018,7 @@ void renderFinalBlock( ldomNode * enode, LFormattedText * txform, RenderRectAcce LVFont * font = enode->getFont().get(); lUInt32 cl = style->color.type!=css_val_color ? 0xFFFFFFFF : style->color.value; lUInt32 bgcl = style->background_color.type!=css_val_color ? 0xFFFFFFFF : style->background_color.value; - if ( nodeElementId == el_q ) { - // Add default quoting opening char - // We do not support showing a different char for multiple embedded , - // and neither the way to specify this with CSS, ie: - // q::before { content: open-quote; } - // :root { quotes: '\201c' '\201d' '\2018' '\2019'; } - // Note: this specific char seem to not be mirrored (when using HarfBuzz) when - // added to some RTL arabic text. But it appears that way with Firefox too! - // But if we use another char (0x00AB / 0x00BB), it gets mirrored correctly. - // Might be that HarfBuzz first substitute it with arabic quotes (which happen - // to look inverted), and then mirror that? - txform->AddSourceLine( L"\x201C", 1, cl, bgcl, font, lang_cfg, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy); - flags &= ~LTEXT_FLAG_NEWLINE & ~LTEXT_SRC_IS_CLEAR_BOTH; // clear newline flag - } + // The following is needed for fribidi to do the right thing when the content creator // has provided hints to explicite ambiguous cases. // and are HTML5 tags allowing to inform or override the bidi algorithm. @@ -3052,16 +3039,16 @@ void renderFinalBlock( ldomNode * enode, LFormattedText * txform, RenderRectAcce // leaving => PDF PDI // but it then doesn't have the intended effect (fribidi bug or limitation?) if ( dir.compare("rtl") == 0 ) { - // txform->AddSourceLine( L"\x2068\x202E", 1, cl, bgcl, font, lang_cfg, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy); + // txform->AddSourceLine( L"\x2068\x202E", 1, cl, bgcl, font, lang_cfg, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy, indent); // closeWithPDFPDI = true; - txform->AddSourceLine( L"\x202E", 1, cl, bgcl, font, lang_cfg, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy); + txform->AddSourceLine( L"\x202E", 1, cl, bgcl, font, lang_cfg, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy, indent); closeWithPDF = true; flags &= ~LTEXT_FLAG_NEWLINE & ~LTEXT_SRC_IS_CLEAR_BOTH; // clear newline flag } else if ( dir.compare("ltr") == 0 ) { - // txform->AddSourceLine( L"\x2068\x202D", 1, cl, bgcl, font, lang_cfg, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy); + // txform->AddSourceLine( L"\x2068\x202D", 1, cl, bgcl, font, lang_cfg, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy, indent); // closeWithPDFPDI = true; - txform->AddSourceLine( L"\x202D", 1, cl, bgcl, font, lang_cfg, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy); + txform->AddSourceLine( L"\x202D", 1, cl, bgcl, font, lang_cfg, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy, indent); closeWithPDF = true; flags &= ~LTEXT_FLAG_NEWLINE & ~LTEXT_SRC_IS_CLEAR_BOTH; // clear newline flag } @@ -3074,17 +3061,17 @@ void renderFinalBlock( ldomNode * enode, LFormattedText * txform, RenderRectAcce // dir=auto => FSI U+2068 FIRST STRONG ISOLATE // leaving => PDI U+2069 POP DIRECTIONAL ISOLATE if ( dir.compare("rtl") == 0 ) { - txform->AddSourceLine( L"\x2067", 1, cl, bgcl, font, lang_cfg, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy); + txform->AddSourceLine( L"\x2067", 1, cl, bgcl, font, lang_cfg, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy, indent); closeWithPDI = true; flags &= ~LTEXT_FLAG_NEWLINE & ~LTEXT_SRC_IS_CLEAR_BOTH; // clear newline flag } else if ( dir.compare("ltr") == 0 ) { - txform->AddSourceLine( L"\x2066", 1, cl, bgcl, font, lang_cfg, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy); + txform->AddSourceLine( L"\x2066", 1, cl, bgcl, font, lang_cfg, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy, indent); closeWithPDI = true; flags &= ~LTEXT_FLAG_NEWLINE & ~LTEXT_SRC_IS_CLEAR_BOTH; // clear newline flag } else if ( nodeElementId == el_bdi || dir.compare("auto") == 0 ) { - txform->AddSourceLine( L"\x2068", 1, cl, bgcl, font, lang_cfg, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy); + txform->AddSourceLine( L"\x2068", 1, cl, bgcl, font, lang_cfg, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy, indent); closeWithPDI = true; flags &= ~LTEXT_FLAG_NEWLINE & ~LTEXT_SRC_IS_CLEAR_BOTH; // clear newline flag } @@ -3104,6 +3091,19 @@ void renderFinalBlock( ldomNode * enode, LFormattedText * txform, RenderRectAcce // be involved for drawing ruby), but lvtextfm could deal with these // itself (by ignoring them in measurement, going back the previous // advance, increasing the line height, drawing above...) + + // BiDi stuff had to be outputed first, before any pseudo element + // (if ..., the added quote (first child pseudo element) + // should be inside the RTL bidi isolation. + if ( nodeElementId == el_pseudoElem ) { + lString16 content = get_applied_content_property(enode); + if ( !content.empty() ) { + int em = font->getSize(); + int letter_spacing = lengthToPx(style->letter_spacing, em, em); + txform->AddSourceLine( content.c_str(), content.length(), cl, bgcl, font, lang_cfg, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy, indent, NULL, 0, letter_spacing); + flags &= ~LTEXT_FLAG_NEWLINE & ~LTEXT_SRC_IS_CLEAR_BOTH; // clear newline flag + } + } } // is_link_start is given to inner elements (to flag the first @@ -3124,22 +3124,17 @@ void renderFinalBlock( ldomNode * enode, LFormattedText * txform, RenderRectAcce LVFont * font = enode->getFont().get(); lUInt32 cl = style->color.type!=css_val_color ? 0xFFFFFFFF : style->color.value; lUInt32 bgcl = style->background_color.type!=css_val_color ? 0xFFFFFFFF : style->background_color.value; - if ( nodeElementId == el_q ) { - // Add default quoting closing char - txform->AddSourceLine( L"\x201D", 1, cl, bgcl, font, lang_cfg, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy); - flags &= ~LTEXT_FLAG_NEWLINE & ~LTEXT_SRC_IS_CLEAR_BOTH; // clear newline flag - } // See comment above: these are the closing counterpart if ( closeWithPDI ) { - txform->AddSourceLine( L"\x2069", 1, cl, bgcl, font, lang_cfg, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy); + txform->AddSourceLine( L"\x2069", 1, cl, bgcl, font, lang_cfg, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy, indent); flags &= ~LTEXT_FLAG_NEWLINE & ~LTEXT_SRC_IS_CLEAR_BOTH; // clear newline flag } else if ( closeWithPDFPDI ) { - txform->AddSourceLine( L"\x202C\x2069", 1, cl, bgcl, font, lang_cfg, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy); + txform->AddSourceLine( L"\x202C\x2069", 1, cl, bgcl, font, lang_cfg, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy, indent); flags &= ~LTEXT_FLAG_NEWLINE & ~LTEXT_SRC_IS_CLEAR_BOTH; // clear newline flag } else if ( closeWithPDF ) { - txform->AddSourceLine( L"\x202C", 1, cl, bgcl, font, lang_cfg, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy); + txform->AddSourceLine( L"\x202C", 1, cl, bgcl, font, lang_cfg, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy, indent); flags &= ~LTEXT_FLAG_NEWLINE & ~LTEXT_SRC_IS_CLEAR_BOTH; // clear newline flag } } @@ -3468,6 +3463,7 @@ void copystyle( css_style_ref_t source, css_style_ref_t dest ) dest->float_ = source->float_; dest->clear = source->clear; dest->direction = source->direction; + dest->content = source->content ; dest->cr_hint = source->cr_hint; } @@ -9033,6 +9029,33 @@ void setNodeStyle( ldomNode * enode, css_style_ref_t parent_style, LVFontRef par if ( spread_background_color ) spreadParent( pstyle->background_color, parent_style->background_color, true ); + // See if applying styles requires pseudo element before/after + bool requires_pseudo_element_before = false; + bool requires_pseudo_element_after = false; + if ( pstyle->pseudo_elem_before_style ) { + if ( pstyle->pseudo_elem_before_style->display != css_d_none + && pstyle->pseudo_elem_before_style->content.length() > 0 + && pstyle->pseudo_elem_before_style->content[0] != L'X' ) { + // Not "display: none" and with "content:" different than "none": + // this pseudo element can be generated + requires_pseudo_element_before = true; + } + delete pstyle->pseudo_elem_before_style; + pstyle->pseudo_elem_before_style = NULL; + } + if ( pstyle->pseudo_elem_after_style ) { + if ( pstyle->pseudo_elem_after_style->display != css_d_none + && pstyle->pseudo_elem_after_style->content.length() > 0 + && pstyle->pseudo_elem_after_style->content[0] != L'X' ) { + // Not "display: none" and with "content:" different than "none": + // this pseudo element can be generated + requires_pseudo_element_after = true; + } + delete pstyle->pseudo_elem_after_style; + pstyle->pseudo_elem_after_style = NULL; + } + pstyle->flags = 0; // cleanup, before setStyle() adds it to cache + // set calculated style //enode->getDocument()->cacheStyle( style ); enode->setStyle( style ); @@ -9043,6 +9066,13 @@ void setNodeStyle( ldomNode * enode, css_style_ref_t parent_style, LVFontRef par // set font enode->initNodeFont(); + + // Now that this node is fully styled, ensure these pseudo elements + // are there as children, creating them if needed and possible + if ( requires_pseudo_element_before ) + enode->ensurePseudoElement(true); + if ( requires_pseudo_element_after ) + enode->ensurePseudoElement(false); } // Uncomment for debugging getRenderedWidths(): @@ -9062,12 +9092,12 @@ void getRenderedWidths(ldomNode * node, int &maxWidth, int &minWidth, int direct bool isStartNode = true; // we are starting measurement on that node // Start measurements and recursions: getRenderedWidths(node, maxWidth, minWidth, direction, ignoreMargin, rendFlags, - curMaxWidth, curWordWidth, collapseNextSpace, lastSpaceWidth, indent, NULL, isStartNode); + curMaxWidth, curWordWidth, collapseNextSpace, lastSpaceWidth, indent, NULL, false, isStartNode); } void getRenderedWidths(ldomNode * node, int &maxWidth, int &minWidth, int direction, bool ignoreMargin, int rendFlags, int &curMaxWidth, int &curWordWidth, bool &collapseNextSpace, int &lastSpaceWidth, - int indent, TextLangCfg * lang_cfg, bool isStartNode) + int indent, TextLangCfg * lang_cfg, bool processNodeAsText, bool isStartNode) { // This does mostly what renderBlockElement, renderFinalBlock and lvtextfm.cpp // do, but only with widths and horizontal margin/border/padding and indent @@ -9079,7 +9109,7 @@ void getRenderedWidths(ldomNode * node, int &maxWidth, int &minWidth, int direct // we only handle list-style-position/text-align combinations vs direction, // which have different rendering methods.) - if ( node->isElement() ) { + if ( node->isElement() && !processNodeAsText ) { int m = node->getRendMethod(); if (m == erm_invisible) return; @@ -9216,6 +9246,13 @@ void getRenderedWidths(ldomNode * node, int &maxWidth, int &minWidth, int direct minWidth = _minw; return; } + if ( node->getNodeId()==el_pseudoElem ) { + // pseudoElem has no children: reprocess this same node + // with processNodeAsText=true, to process its text content. + getRenderedWidths(node, maxWidth, minWidth, direction, false, rendFlags, + curMaxWidth, curWordWidth, collapseNextSpace, lastSpaceWidth, indent, lang_cfg, true); + return; + } // Contains only other inline or text nodes: // add to our passed by ref *Width for (int i = 0; i < node->getChildCount(); i++) { @@ -9342,6 +9379,12 @@ void getRenderedWidths(ldomNode * node, int &maxWidth, int &minWidth, int direct curMaxWidth, curWordWidth, collapseNextSpace, lastSpaceWidth, indent, lang_cfg); // A
can happen deep among our children, so we deal with that when erm_inline above } + if ( node->getNodeId() == el_pseudoElem ) { + // erm_final pseudoElem (which has no children): reprocess this same + // node with processNodeAsText=true, to process its text content. + getRenderedWidths(node, _maxWidth, _minWidth, direction, false, rendFlags, + curMaxWidth, curWordWidth, collapseNextSpace, lastSpaceWidth, indent, lang_cfg, true); + } if (lastSpaceWidth) curMaxWidth -= lastSpaceWidth; // Add current word as we're leaving a block node, so it can't be followed by some other text @@ -9478,13 +9521,25 @@ void getRenderedWidths(ldomNode * node, int &maxWidth, int &minWidth, int direct if (_minWidth > minWidth) minWidth = _minWidth; } - else if (node->isText() ) { - lString16 nodeText = node->getText(); + else { // text or pseudoElem + lString16 text; int start = 0; - int len = nodeText.length(); + int len = 0; + ldomNode * parent; + if ( node->isText() ) { + text = node->getText(); + parent = node->getParentNode(); + } + else if ( node->getNodeId() == el_pseudoElem ) { + text = get_applied_content_property(node); + parent = node; // this pseudoElem node carries the font and style of the text + if ( isStartNode ) { + lang_cfg = TextLangMan::getTextLangCfg( node ); // Fetch it from node or its parents + } + } + len = text.length(); if ( len == 0 ) return; - ldomNode *parent = node->getParentNode(); // letter-spacing LVFont * font = parent->getFont().get(); int em = font->getSize(); @@ -9493,16 +9548,16 @@ void getRenderedWidths(ldomNode * node, int &maxWidth, int &minWidth, int direct // text-transform switch (parent_style->text_transform) { case css_tt_uppercase: - nodeText.uppercase(); + text.uppercase(); break; case css_tt_lowercase: - nodeText.lowercase(); + text.lowercase(); break; case css_tt_capitalize: - nodeText.capitalize(); + text.capitalize(); break; case css_tt_full_width: - // nodeText.fullWidthChars(); // disabled for now (may change CJK rendering) + // text.fullWidthChars(); // disabled for now (may change CJK rendering) break; case css_tt_none: case css_tt_inherit: @@ -9515,10 +9570,10 @@ void getRenderedWidths(ldomNode * node, int &maxWidth, int &minWidth, int direct bool pre = parent_style->white_space >= css_ws_pre; int space_width_scale_percent = pre ? 100 : parent->getDocument()->getSpaceWidthScalePercent(); // measure text - const lChar16 * txt = nodeText.c_str(); + const lChar16 * txt = text.c_str(); #ifdef DEBUG_GETRENDEREDWIDTHS - printf("GRW text: |%s|\n", UnicodeToLocal(nodeText).c_str()); - printf("GRW text: (dumb text size=%d)\n", node->getParentNode()->getFont()->getTextWidth(txt, len)); + printf("GRW text: |%s|\n", UnicodeToLocal(text).c_str()); + printf("GRW text: (dumb text size=%d)\n", font->getTextWidth(txt, len)); #endif #define MAX_TEXT_CHUNK_SIZE 4096 static lUInt16 widths[MAX_TEXT_CHUNK_SIZE+1]; @@ -9537,12 +9592,11 @@ void getRenderedWidths(ldomNode * node, int &maxWidth, int &minWidth, int direct // line breaking rules between contiguous text nodes (but it's a bit // complicated to pass this lbCtx across calls...) struct LineBreakContext lbCtx; - lb_init_break_context(&lbCtx, 0x0020, NULL); + lb_init_break_context(&lbCtx, 0x200D, NULL); // ZERO WIDTH JOINER lbCtx.lbpLang = lang_cfg->getLBProps(); lb_process_next_char(&lbCtx, (utf32_t)(*txt)); #endif while (true) { - LVFont * font = node->getParentNode()->getFont().get(); int chars_measured = font->measureText( txt + start, len, diff --git a/crengine/src/lvstsheet.cpp b/crengine/src/lvstsheet.cpp index e3700c69d..06179145c 100644 --- a/crengine/src/lvstsheet.cpp +++ b/crengine/src/lvstsheet.cpp @@ -113,6 +113,7 @@ enum css_decl_code { cssd_float, cssd_clear, cssd_direction, + cssd_content, cssd_cr_ignore_if_dom_version_greater_or_equal, cssd_cr_hint, cssd_cr_only_if, @@ -205,6 +206,7 @@ static const char * css_decl_name[] = { "float", "clear", "direction", + "content", "-cr-ignore-if-dom-version-greater-or-equal", "-cr-hint", "-cr-only-if", @@ -344,6 +346,12 @@ static lUInt32 parse_important( const char *str ) // does not advance the origin static bool next_property( const char * & str ) { + // todo: + // https://www.w3.org/TR/CSS2/syndata.html#parsing-errors + // User agents must handle unexpected tokens encountered while + // parsing a declaration by reading until the end of the + // declaration, while observing the rules for matching pairs + // of (), [], {}, "", and '', and correctly handling escapes. while (*str && *str !=';' && *str!='}') str++; if (*str == ';') @@ -351,6 +359,21 @@ static bool next_property( const char * & str ) return skip_spaces( str ); } +static bool next_token( const char * & str ) +{ + // todo: as for next_property() + while (*str && *str !=';' && *str!='}' && *str!=' ') + str++; + if (*str == ' ') { + if ( skip_spaces( str ) ) { + if (*str && *str !=';' && *str!='}') + // Something else before next property or end of declaration + return true; + } + } + return false; +} + static bool parse_integer( const char * & str, int & value) { skip_spaces( str ); @@ -851,6 +874,277 @@ bool parse_color_value( const char * & str, css_length_t & value ) return false; } +// Parse a CSS "content:" property into an intermediate format single string. +bool parse_content_property( const char * & str, lString16 & parsed_content) +{ + // https://developer.mozilla.org/en-US/docs/Web/CSS/content + // The property may have multiple tokens: + // p::before { content: "[" attr(n) "]"; } + // content: "Qq. " attr(qq) + // content: '\201D\ In: '; + // We can meet some bogus values: content: "∙ "; + // or values we don't support: Firefox would drop the whole + // declaration, but, as we don't support all those from the + // specs, we'll just ignore the tokens we don't support. + // We parse the original content into a "parsed content" string, + // consisting of a first letter, indicating its type, and if + // some data: its length and that data. + // parsed_content may contain multiple values, in the format + // 'X' for 'none' (or 'normal', = none with pseudo elements) + // 's' + + string16 (string content) for "" + // 'a' + + string16 (attribute name) for attr() + // 'Q' for 'open-quote' + // 'q' for 'close-quote' + // 'N' for 'no-open-quote' + // 'n' for 'no-close-quote' + // 'u' for 'url()', that we don't support + // 'z' for unsupported tokens, like gradient()... + // Note: this parsing might not be super robust with + // convoluted declarations... + parsed_content.clear(); + const char * orig_pos = str; + // The presence of a single 'none' or 'normal' among multiple + // values make the whole thing 'none'. + bool has_none = false; + while ( skip_spaces( str ) && *str!=';' && *str!='}' && *str!='!' ) { + if ( substr_icompare("none", str) ) { + has_none = true; + continue; // continue parsing + } + else if ( substr_icompare("normal", str) ) { + // Computes to 'none' for pseudo elements + has_none = true; + continue; // continue parsing + } + else if ( substr_icompare("open-quote", str) ) { + parsed_content << L'Q'; + continue; + } + else if ( substr_icompare("close-quote", str) ) { + parsed_content << L'q'; + continue; + } + else if ( substr_icompare("no-open-quote", str) ) { + parsed_content << L'N'; + continue; + } + else if ( substr_icompare("no-close-quote", str) ) { + parsed_content << L'n'; + continue; + } + else if ( substr_icompare("attr", str) ) { + if ( *str == '(' ) { + str++; + skip_spaces( str ); + lString8 attr8; + while ( *str && *str!=')' ) { + attr8 << *str; + str++; + } + if ( *str == ')' ) { + str++; + lString16 attr = Utf8ToUnicode(attr8); + attr.trim(); + parsed_content << L'a'; + parsed_content << lChar16(attr.length()); + parsed_content << attr; + continue; + } + // No closing ')': invalid + } + } + else if ( substr_icompare("url", str) ) { + // Unsupported for now, but parse it + if ( *str == '(' ) { + str++; + skip_spaces( str ); + lString8 url8; + while ( *str && *str!=')' ) { + url8 << *str; + str++; + } + if ( *str == ')' ) { + str++; + parsed_content << L'u'; + continue; + } + // No closing ')': invalid + } + } + else if ( *str == '"' || *str == '\'' ) { + // https://developer.mozilla.org/en-US/docs/Web/CSS/string + // https://www.w3.org/TR/CSS2/syndata.html#strings + // https://drafts.csswg.org/css-values-3/#strings + char quote_ch = *str; + str++; + lString8 str8; // quoted string content (as UTF8, like original stylesheet) + while ( *str && *str != quote_ch ) { + if ( *str == '\\' ) { + // https://www.w3.org/TR/CSS2/syndata.html#characters + str++; + if ( hexDigit(*str) >= 0 ) { + lUInt32 codepoint = 0; + int num_digits = 0; + while ( num_digits < 6 ) { + int v = hexDigit(*str); + if ( v >= 0 ) { + codepoint = (codepoint << 4) + v; + num_digits++; + str++; + continue; + } + // Not a hex digit + break; + } + if ( num_digits < 6 && *str == ' ' ) // skip space following a non-6-hex-digits + str++; + if ( codepoint == 0 || codepoint > 0x10FFFF ) { + // zero not allowed, and should be under max valid unicode codepoint + codepoint = 0xFFFD; // replacement character + } + // Serialize it as UTF-8 + lString16 c; + c << (lChar16)codepoint; + str8 << UnicodeToLocal(c); + } + else if ( *str == '\r' && *(str+1) == '\n' ) { + // Ignore \ at end of CRLF line + str += 2; + } + else if ( *str == '\n' ) { + // Ignore \ at end of line + str++; + } + else { + // Accept next char as is + str8 << *str; + str++; + } + } + else { + str8 << *str; + str++; + } + // todo: + // https://www.w3.org/TR/CSS2/syndata.html#parsing-errors + // "User agents must close strings upon reaching the end + // of a line (i.e., before an unescaped line feed, carriage + // return or form feed character), but then drop the construct + // (declaration or rule) in which the string was found." + } + if ( *str == quote_ch ) { + lString16 str16 = Utf8ToUnicode(str8); + parsed_content << L's'; + parsed_content << lChar16(str16.length()); + parsed_content << str16; + str++; + continue; + } + } + else { + // Not supported + parsed_content << L'z'; + next_token(str); + } + } + if ( has_none ) { + // Forget all other tokens parsed + parsed_content.clear(); + parsed_content << L'X'; + } + if (*str) // something (;, } or !important) follows + return true; + // Restore original position if we reach end of CSS string, + // as it might just be missing a ')' or closing quote: we'll + // be skipping up to next ; or }, and might manage with + // the rest of the string. + str = orig_pos; + return false; +} + +/// Returns the computed value for a node from its parsed CSS "content:" value +lString16 get_applied_content_property( ldomNode * node ) { + lString16 res; + css_style_ref_t style = node->getStyle(); + lString16 parsed_content = style->content; + if ( parsed_content.empty() ) + return res; + int i = 0; + int parsed_content_len = parsed_content.length(); + while ( i < parsed_content_len ) { + lChar16 ctype = parsed_content[i++]; + if ( ctype == 's' ) { // literal string + lChar16 len = parsed_content[i++]; + res << parsed_content.substr(i, len); + i += len; + } + else if ( ctype == 'a' ) { // attribute value + lChar16 len = parsed_content[i++]; + lString16 attr_name = parsed_content.substr(i, len); + i += len; + ldomNode * attrNode = node; + if ( node->getNodeId() == el_pseudoElem ) { + // For attributes, we should pick them from the parent of the added pseudo element + attrNode = node->getUnboxedParent(); + } + if ( attrNode ) + res << attrNode->getAttributeValue(attr_name.c_str()); + } + else if ( ctype == 'u' ) { // url + // Url to image: we can't easily support that, as our + // image support needs a reference to a node, and we + // don't have a node here. + // Show a small square so one can see there's something + // that is missing, something different enough from the + // classic tofu char so we can distinguish it. + // res << 0x25FD; // WHITE MEDIUM SMALL SQUARE + res << 0x2B26; // WHITE MEDIUM DIAMOND + } + else if ( ctype == 'Q' ) { // open-quote + // Add default quoting opening char + // We do not support showing a different char for multiple nested , + // and neither the way to specify this with CSS, ie: + // q::before { content: open-quote; } + // :root { quotes: '\201c' '\201d' '\2018' '\2019'; } + // todo: have the right quote char for a language provided by lang_cfg + res << 0x201C; + // Note: this specific char seem to not be mirrored (when using HarfBuzz) when + // added to some RTL arabic text. But it appears that way with Firefox too! + // But if we use another char (0x00AB / 0x00BB), it gets mirrored correctly. + // Might be that HarfBuzz first substitute it with arabic quotes (which happen + // to look inverted), and then mirror that? + } + else if ( ctype == 'q' ) { // close-quote + // Add default quoting closing char + res << 0x201D; + } + else if ( ctype == 'N' ) { // no-open-quote + // (This should just increment nested quote level if we supported that) + // Nothing to output + } + else if ( ctype == 'n' ) { // no-close-quote + // (This should just decrement nested quote level if we supported that) + // Nothing to output + } + else if ( ctype == 'X' ) { // 'none' + res.clear(); // should be standalone, but let's be sure + break; + } + else if ( ctype == 'z' ) { // unsupported token + // Just ignore it, don't show anything + } + else { // unexpected + break; + } + } + if ( style->white_space < css_ws_pre_line ) { + // Remove consecutive spaces (although this might be handled well by + // lvtextfm) and '\n' - but we should keep leading and trailing spaces. + res.trimDoubleSpaces(true, true, false); + } + return res; +} + static void resolve_url_path( lString8 & str, lString16 codeBase ) { // A URL (path to local or container's file) must be resolved // at parsing time, as it is related to this stylesheet file @@ -2210,6 +2504,18 @@ bool LVCssDeclaration::parse( const char * &decl, bool higher_importance, lxmlDo case cssd_direction: n = parse_name( decl, css_dir_names, -1 ); break; + case cssd_content: + { + lString16 parsed_content; + if ( parse_content_property( decl, parsed_content) ) { + buf<<(lUInt32) (cssd_content | importance | parsed_important | parse_important(decl)); + buf<<(lUInt32) parsed_content.length(); + for (int i=0; i < parsed_content.length(); i++) { + buf<<(lUInt32) parsed_content[i]; + } + } + } + break; case cssd_stop: case cssd_unknown: default: @@ -2458,7 +2764,7 @@ void LVCssDeclaration::apply( css_style_rec_t * style ) style->Apply( (css_border_style_type_t) *p++, &style->border_style_left, imp_bit_border_style_left, is_important ); break; case cssd_background_image: - { + { lString8 imagefile; imagefile.reserve(64); int l = *p++; @@ -2466,7 +2772,7 @@ void LVCssDeclaration::apply( css_style_rec_t * style ) imagefile << (lChar8)(*p++); imagefile.pack(); style->Apply( imagefile, &style->background_image, imp_bit_background_image, is_important ); - } + } break; case cssd_background_repeat: style->Apply( (css_background_repeat_value_t) *p++, &style->background_repeat, imp_bit_background_repeat, is_important ); @@ -2502,6 +2808,16 @@ void LVCssDeclaration::apply( css_style_rec_t * style ) case cssd_cr_hint: style->Apply( (css_cr_hint_t) *p++, &style->cr_hint, imp_bit_cr_hint, is_important ); break; + case cssd_content: + { + int l = *p++; + lString16 content; + content.reserve(l); + for (int i=0; iApply( content, &style->content, imp_bit_content, is_important ); + } + break; case cssd_stop: return; } @@ -2847,7 +3163,7 @@ bool LVCssSelectorRule::check( const ldomNode * & node ) } break; case cssrt_universal: // * - return true; + return true; // should it be: return !node->isBoxingNode(); ? case cssrt_pseudoclass: // E:pseudo-class { int nodeId; @@ -3014,8 +3330,27 @@ bool LVCssSelectorRule::checkNextRules( const ldomNode * node ) bool LVCssSelector::check( const ldomNode * node ) const { + lUInt16 nodeId = node->getNodeId(); + if ( nodeId == el_pseudoElem ) { + if ( !_pseudo_elem ) { // not a ::before/after rule + // Our added pseudoElem element should not match any other rules + // (if we added it as a child of a P element, it should not match P > *) + return false; + } + else { + // We might be the pseudoElem that was created by this selector. + // Start checking the rules starting from the real parent. + node = node->getUnboxedParent(); + nodeId = node->getNodeId(); + } + } + else if ( _id==0 && node->isBoxingNode() ) { + // Don't apply "... *" or '.classname' selectors to boxing nodes + // (but let those with our internal element names ("... autoBoxing") be applied) + return false; + } // check main Id - if (_id!=0 && node->getNodeId() != _id) + if (_id!=0 && nodeId != _id) return false; if (!_rules) return true; @@ -3133,11 +3468,17 @@ LVCssSelectorRule * parse_attr( const char * &str, lxmlDocBase * doc ) } else if ( *str==':' ) { // E:pseudo-class (eg: E:first-child) str++; - if (*str==':') // pseudo element (double ::, eg: E::first-line) are not supported + if (*str==':') { + // pseudo element (double ::, eg: E::first-line) are not supported, + // except ::before/after which are handled in LVCssSelector::parse() + str--; return NULL; + } int n = parse_name( str, css_pseudo_classes, -1 ); - if (n == -1) // not one of out supported pseudo classes + if (n == -1) { // not one of out supported pseudo classes + str--; // LVCssSelector::parse() will also check for :before/after with a single ':' return NULL; + } attrvalue[0] = 0; if (*str=='(') { // parse () content str++; @@ -3324,12 +3665,13 @@ bool LVCssSelector::parse( const char * &str, lxmlDocBase * doc ) // to lowercase them here too to expect a match. lString16 element(ident); if ( element.length() < 8 ) { - // Avoid following string comparisons if element + // Avoid following string comparisons if element name string // is shorter than the shortest of them (floatBox) element = element.lowercase(); } else if ( element != "DocFragment" && element != "autoBoxing" && element != "tabularBox" && - element != "floatBox" && element != "inlineBox" && element != "FictionBook" ) { + element != "floatBox" && element != "inlineBox" && element != "pseudoElem" && + element != "FictionBook" ) { element = element.lowercase(); } _id = doc->getElementNameIndex( element.c_str() ); @@ -3354,8 +3696,28 @@ bool LVCssSelector::parse( const char * &str, lxmlDocBase * doc ) while ( *str == '[' || *str=='.' || *str=='#' || *str==':' ) { LVCssSelectorRule * rule = parse_attr( str, doc ); - if (!rule) + if (!rule) { + // Might be one of our supported pseudo elements, which should + // start with "::" but might start with a single ":". + // These pseudo element do not add a LVCssSelectorRule. + if ( *str==':' ) { + str++; + if ( *str==':' ) // skip double :: + str++; + int n = parse_name( str, css_pseudo_elements, -1 ); + if (n != -1) { + _pseudo_elem = n+1; // starts at 1 + _specificity += WEIGHT_SPECIFICITY_ELEMENT; + // Done with this selector: we expect ::before and ::after + // to come always last, and are not followed by other rules. + // ("x::before::before" seems not ensured by Firefox - if we + // stop between them, the 2nd "::before" will make the parsing + // of the declaration invalid, and so this rule.) + return true; + } + } return false; + } insertRuleStart( rule ); //insertRuleAfterStart //insertRuleAfterStart( rule ); //insertRuleAfterStart _specificity += rule->getWeight(); @@ -3435,6 +3797,55 @@ static bool skip_until_end_of_rule( const char * &str ) return *str != 0; } +void LVCssSelector::applyToPseudoElement( const ldomNode * node, css_style_rec_t * style ) const +{ + // This might be called both on the node that match the selector (we should + // not apply to the style of this node), and on the actual pseudo element + // once it has been created as a child (to which we should apply). + css_style_rec_t * target_style = NULL; + if ( node->getNodeId() == el_pseudoElem ) { + if ( ( _pseudo_elem == csspe_before && node->hasAttribute(attr_Before) ) + || ( _pseudo_elem == csspe_after && node->hasAttribute(attr_After) ) ) { + target_style = style; + } + } + else { + // For the matching node, we create two style slots to which we apply + // the declaration. This is just to have all styles applied and see + // at the end if the pseudo element is display:none or not, and if + // it should be skipped or created. + // These css_style_rec_t are just temp slots to gather what's applied, + // they are not the ones that will be associated to the pseudo element. + if ( _pseudo_elem == csspe_before ) { + if ( !style->pseudo_elem_before_style ) { + style->pseudo_elem_before_style = new css_style_rec_t; + } + target_style = style->pseudo_elem_before_style; + } + else if ( _pseudo_elem == csspe_after ) { + if ( !style->pseudo_elem_after_style ) { + style->pseudo_elem_after_style = new css_style_rec_t; + } + target_style = style->pseudo_elem_after_style; + } + } + + if ( target_style ) { + if ( !(target_style->flags & STYLE_REC_FLAG_MATCHED ) ) { + // pseudoElem starts with "display: none" (in case they were created and + // inserted in the DOM by a CSS selector that can later disappear). + // Switch them to "display: inline" when we meet such a selector. + // (The coming up _decl->apply() may not update ->display, or it may set + // it explicitely to css_d_none, that we don't want reset to inline.) + target_style->display = css_d_inline; + target_style->flags |= STYLE_REC_FLAG_MATCHED; + } + // And apply this selector styling. + _decl->apply(target_style); + } + return; +} + LVCssSelectorRule::LVCssSelectorRule( LVCssSelectorRule & v ) : _type(v._type), _id(v._id), _attrid(v._attrid) , _next(NULL) @@ -3445,7 +3856,7 @@ LVCssSelectorRule::LVCssSelectorRule( LVCssSelectorRule & v ) } LVCssSelector::LVCssSelector( LVCssSelector & v ) -: _id(v._id), _decl(v._decl), _specificity(v._specificity), _next(NULL), _rules(NULL) +: _id(v._id), _decl(v._decl), _specificity(v._specificity), _pseudo_elem(v._pseudo_elem), _next(NULL), _rules(NULL) { if ( v._next ) _next = new LVCssSelector( *v._next ); @@ -3481,6 +3892,10 @@ void LVStyleSheet::apply( const ldomNode * node, css_style_rec_t * style ) return; // no rules! lUInt16 id = node->getNodeId(); + if ( id == el_pseudoElem ) { // get the id chain from the parent element + // Note that a "div:before {float:left}" will result in:

+ id = node->getUnboxedParent()->getNodeId(); + } // _selectors[0] holds the ordered chain of selectors starting (from // the right of the selector) with a rule with no element name attached @@ -3547,6 +3962,7 @@ lUInt32 LVCssSelector::getHash() } hash = hash * 31 + nextHash; hash = hash * 31 + _specificity; + hash = hash * 31 + _pseudo_elem; if (!_decl.isNull()) hash = hash * 31 + _decl->getHash(); return hash; diff --git a/crengine/src/lvstyles.cpp b/crengine/src/lvstyles.cpp index 8e8e8bc5d..a070e652b 100644 --- a/crengine/src/lvstyles.cpp +++ b/crengine/src/lvstyles.cpp @@ -106,7 +106,8 @@ lUInt32 calcHash(css_style_rec_t & rec) + (lUInt32)rec.direction) * 31 + (lUInt32)rec.cr_hint) * 31 + (lUInt32)rec.font_name.getHash() - + (lUInt32)rec.background_image.getHash()); + + (lUInt32)rec.background_image.getHash() + + (lUInt32)rec.content.getHash()); return rec.hash; } @@ -170,6 +171,7 @@ bool operator == (const css_style_rec_t & r1, const css_style_rec_t & r2) r1.float_ == r2.float_&& r1.clear == r2.clear&& r1.direction == r2.direction&& + r1.content == r2.content&& r1.cr_hint==r2.cr_hint; } @@ -357,6 +359,7 @@ bool css_style_rec_t::serialize( SerialBuf & buf ) ST_PUT_ENUM(float_); ST_PUT_ENUM(clear); ST_PUT_ENUM(direction); + buf << content; ST_PUT_ENUM(cr_hint); lUInt32 hash = calcHash(*this); buf << hash; @@ -416,6 +419,7 @@ bool css_style_rec_t::deserialize( SerialBuf & buf ) ST_GET_ENUM(css_float_t, float_); ST_GET_ENUM(css_clear_t, clear); ST_GET_ENUM(css_direction_t, direction); + buf>>content; ST_GET_ENUM(css_cr_hint_t, cr_hint); lUInt32 hash = 0; buf >> hash; diff --git a/crengine/src/lvtinydom.cpp b/crengine/src/lvtinydom.cpp index 758205c47..f79c4343f 100644 --- a/crengine/src/lvtinydom.cpp +++ b/crengine/src/lvtinydom.cpp @@ -84,7 +84,7 @@ int gDOMVersionRequested = DOM_VERSION_CURRENT; /// change in case of incompatible changes in swap/cache file format to avoid using incompatible swap file // increment to force complete reload/reparsing of old file -#define CACHE_FILE_FORMAT_VERSION "3.05.40k" +#define CACHE_FILE_FORMAT_VERSION "3.05.41k" /// increment following value to force re-formatting of old book after load #define FORMATTING_VERSION_ID 0x0023 @@ -3627,7 +3627,7 @@ static void writeNode( LVStream * stream, ldomNode * node, bool treeLayout ) #define WRITENODEEX_NB_SKIPPED_CHARS 0x0100 ///< show number of skipped chars in text nodes: (...43...) #define WRITENODEEX_NB_SKIPPED_NODES 0x0200 ///< show number of skipped sibling nodes: [...17...] #define WRITENODEEX_SHOW_REND_METHOD 0x0400 ///< show rendering method at end of tag (
=Final, =Inline...) -#define WRITENODEEX_UNUSED_2 0x0800 ///< +#define WRITENODEEX_SHOW_MISC_INFO 0x0800 ///< show additional info (depend on context) #define WRITENODEEX_ADD_UPPER_DIR_LANG_ATTR 0x1000 ///< add dir= and lang= grabbed from upper nodes #define WRITENODEEX_GET_CSS_FILES 0x2000 ///< ensure css files that apply to initial node are returned /// in &cssFiles (needed when not starting from root node) @@ -3913,6 +3913,18 @@ static void writeNodeEx( LVStream * stream, ldomNode * node, lString16Collection // rendering method, which gives us a visual hint of it. lvdom_element_render_method rm = node->getRendMethod(); // Text and inline nodes stay stuck together, but not all others + if (rm == erm_invisible) { + // We don't know how invisible nodes would be displayed if + // they were visible. Make the invisible tree like inline + // among finals, so they don't take too much height. + if (node->getParentNode()) { + rm = node->getParentNode()->getRendMethod(); + if (rm == erm_invisible || rm == erm_inline || rm == erm_final) + rm = erm_inline; + else + rm = erm_final; + } + } if ( (rm != erm_inline && rm != erm_runin) || node->isBoxingInlineBox()) { doNewLineBeforeStartTag = true; doNewLineAfterStartTag = true; @@ -3973,7 +3985,6 @@ static void writeNodeEx( LVStream * stream, ldomNode * node, lString16Collection } } } - // Do something specific when erm_invisible ? } if ( containsStart && WNEFLAG(NB_SKIPPED_NODES) ) { @@ -4018,6 +4029,21 @@ static void writeNodeEx( LVStream * stream, ldomNode * node, lString16Collection lString8 attrName( UnicodeToUtf8(node->getDocument()->getAttrName(attr->id)) ); lString8 nsName( UnicodeToUtf8(node->getDocument()->getNsName(attr->nsid)) ); lString8 attrValue( UnicodeToUtf8(node->getDocument()->getAttrValue(attr->index)) ); + if ( WNEFLAG(SHOW_MISC_INFO) ) { + if ( node->getNodeId() == el_pseudoElem && (attr->id == attr_Before || attr->id == attr_After) ) { + // Show the rendered content as the otherwise empty Before/After attribute value + if ( WNEFLAG(TEXT_SHOW_UNICODE_CODEPOINT) ) { + lString16 content = get_applied_content_property(node); + attrValue.empty(); + for ( int i=0; i 0 ) *stream << nsName << ":"; @@ -4480,6 +4506,10 @@ bool ldomDocument::render( LVRendPageList * pages, LVDocViewCallback * callback, //CRLog::trace("validate 2..."); //validateDocument(); + // initNodeRendMethod() (and setNodeStyle() with pseudo elements) may wish to box or + // create elements, but may be prevented from doing so by an existing cache file + _boxingWishedButPreventedByCache = false; + CRLog::trace("Save stylesheet..."); _stylesheet.push(); CRLog::trace("Init node styles..."); @@ -4488,10 +4518,6 @@ bool ldomDocument::render( LVRendPageList * pages, LVDocViewCallback * callback, CRLog::trace("Restoring stylesheet..."); _stylesheet.pop(); - // initNodeRendMethod may wish to box elements, but may be - // prevented from doing so by an existing cache file - _boxingWishedButPreventedByCache = false; - CRLog::trace("init render method..."); getRootNode()->initNodeRendMethodRecursive(); @@ -4827,7 +4853,8 @@ bool IsEmptySpace( const lChar16 * text, int len ) static bool IS_FIRST_BODY = false; ldomElementWriter::ldomElementWriter(ldomDocument * document, lUInt16 nsid, lUInt16 id, ldomElementWriter * parent) - : _parent(parent), _document(document), _tocItem(NULL), _isBlock(true), _isSection(false), _stylesheetIsSet(false), _bodyEnterCalled(false) + : _parent(parent), _document(document), _tocItem(NULL), _isBlock(true), _isSection(false), + _stylesheetIsSet(false), _bodyEnterCalled(false), _pseudoElementAfterChildIndex(-1) { //logfile << "{c"; _typeDef = _document->getElementTypePtr( id ); @@ -4982,6 +5009,28 @@ void ldomElementWriter::onBodyEnter() // CRLog::error("error while style initialization of element %x %s", _element->getNodeIndex(), LCSTR(_element->getNodeName()) ); // crFatalError(); // } + int nb_children = _element->getChildCount(); + if ( nb_children > 0 ) { + // The only possibility for this element being built to have children + // is if the above initNodeStyle() has applied to this node some + // matching selectors that had ::before or ::after, which have then + // created one or two pseudoElem children. But let's be sure of that. + for ( int i=0; igetChildNode(i); + if ( child->getNodeId() == el_pseudoElem ) { + // ->initNodeStyle() has been done when the element was created; + // as pseudo elements have no children, let's ->initNodeRendMethod() + // now (as done in onBodyExit()). + child->initNodeRendMethod(); + // ldomNode::ensurePseudoElement() will always have inserted + // "Before" first, and "After" second. But real children might + // soon be added, and we'll have to move "After" last when done. + // Which will be done in onBodyExit(). + if ( child->hasAttribute(attr_After) ) + _pseudoElementAfterChildIndex = i; + } + } + } _isBlock = isBlockNode(_element); // If initNodeStyle() has set "white-space: pre" or alike, update _flags if ( _element->getStyle()->white_space >= css_ws_pre_line) { @@ -5001,6 +5050,63 @@ void ldomElementWriter::onBodyEnter() #endif } +void ldomNode::ensurePseudoElement( bool is_before ) { +#if BUILD_LITE!=1 + // This node should have that pseudoElement, but it might already be there, + // so check if there is already one, and if not, create it. + // This happens usually in the initial loading phase, but it might in + // a re-rendering if the pseudo element is introduced by a change in + // styles (we won't be able to create a node if there's a cache file). + int insertChildIndex = -1; + int nb_children = getChildCount(); + if ( is_before ) { // ::before + insertChildIndex = 0; // always to be inserted first, if not already there + if ( nb_children > 0 ) { + ldomNode * child = getChildNode(0); // should always be found as the first node + // pseudoElem might have been wrapped by a inlineBox, autoBoxing, floatBox... + while ( child && child->isBoxingNode() && child->getChildCount()>0 ) + child = child->getChildNode(0); + if ( child && child->getNodeId() == el_pseudoElem && child->hasAttribute(attr_Before) ) { + // Already there, no need to create it + insertChildIndex = -1; + } + } + } + else { // ::after + // In the XML loading phase, this one might be either first, + // or second if there's already a Before. In the re-rendering + // phase, it would have been moved as the last node. In all these + // cases, it is always the last at the moment we are checking. + insertChildIndex = nb_children; // always to be inserted last, if not already there + if ( nb_children > 0 ) { + ldomNode * child = getChildNode(nb_children-1); // should always be found as the last node + // pseudoElem might have been wrapped by a inlineBox, autoBoxing, floatBox... + while ( child && child->isBoxingNode() && child->getChildCount()>0 ) + child = child->getChildNode(0); + if ( child && child->getNodeId() == el_pseudoElem && child->hasAttribute(attr_After) ) { + // Already there, no need to create it + insertChildIndex = -1; + } + } + } + if ( insertChildIndex >= 0 ) { + if ( getDocument()->hasCacheFile() ) { + getDocument()->setBoxingWishedButPreventedByCache(); + } + else { + ldomNode * pseudo = insertChildElement( insertChildIndex, LXML_NS_NONE, el_pseudoElem ); + lUInt16 attribute_id = is_before ? attr_Before : attr_After; + pseudo->setAttributeValue(LXML_NS_NONE, attribute_id, L""); + // We are called by lvrend.cpp setNodeStyle(), after the parent + // style and font have been fully set up. + // We can set this pseudo element style as it can now properly inherit. + pseudo->initNodeStyle(); + } + } + +#endif +} + #if BUILD_LITE!=1 static void resetRendMethodToInline( ldomNode * node ) { @@ -5257,6 +5363,12 @@ static void detectChildTypes( ldomNode * parent, bool & hasBlockItems, bool & ha hasBlockItems = false; hasInline = false; hasFloating = false; + if ( parent->getNodeId() == el_pseudoElem ) { + // pseudoElem (generated from CSS ::before and ::after), will have + // some (possibly empty) plain text content. + hasInline = true; + return; // and it has no children + } int len = parent->getChildCount(); for ( int i=len-1; i>=0; i-- ) { ldomNode * node = parent->getChildNode(i); @@ -5586,7 +5698,7 @@ bool hasInvisibleParent( ldomNode * node ) return false; } -bool ldomNode::isFloatingBox() +bool ldomNode::isFloatingBox() const { // BLOCK_RENDERING_G(FLOAT_FLOATBOXES) is what triggers rendering // the floats floating. They are wrapped in a floatBox, possibly @@ -5599,7 +5711,7 @@ bool ldomNode::isFloatingBox() /// is node an inlineBox that has not been re-inlined by having /// its child no more inline-block/inline-table -bool ldomNode::isBoxingInlineBox() +bool ldomNode::isBoxingInlineBox() const { // BLOCK_RENDERING_G(BOX_INLINE_BLOCKS) is what ensures inline-block // are boxed and rendered as an inline block, but we may have them @@ -5620,7 +5732,7 @@ bool ldomNode::isBoxingInlineBox() /// is node an inlineBox that wraps a bogus embedded block (not inline-block/inline-table) /// can be called with inline_box_checks_done=true when isBoxingInlineBox() has already /// been called to avoid rechecking what is known -bool ldomNode::isEmbeddedBlockBoxingInlineBox(bool inline_box_checks_done) +bool ldomNode::isEmbeddedBlockBoxingInlineBox(bool inline_box_checks_done) const { if ( !inline_box_checks_done ) { if ( getNodeId() != el_inlineBox || !BLOCK_RENDERING_G(BOX_INLINE_BLOCKS) ) @@ -6611,6 +6723,14 @@ void ldomElementWriter::onBodyExit() if ( !_bodyEnterCalled ) { onBodyEnter(); } + if ( _pseudoElementAfterChildIndex >= 0 ) { + if ( _pseudoElementAfterChildIndex != _element->getChildCount()-1 ) { + // Not the last child: move it there + // printf("moving After from %d to %d\n", _pseudoElementAfterChildIndex, _element->getChildCount()-1); + // moveItemsTo() just works to remove it, and re-add it (so, adding it at the end) + _element->moveItemsTo( _element, _pseudoElementAfterChildIndex, _pseudoElementAfterChildIndex); + } + } // if ( _element->getStyle().isNull() ) { // lString16 path; // ldomNode * p = _element->getParentNode(); @@ -8430,7 +8550,9 @@ bool ldomXPointer::getRect(lvRect & rect, bool extended, bool adjusted) const static bool isBoxingNode(ldomNode * node) { - return node->isBoxingNode(); + // In the context this is used (xpointers), handle pseudoElems (that don't + // box anything) just as boxing nodes: ignoring them in XPointers. + return node->isBoxingNode(true); } static bool isTextNode(ldomNode * node) @@ -8771,7 +8893,7 @@ lString16 ldomXPointer::toStringV2() ldomNode * node = getNode(); int offset = getOffset(); ldomNode * p = node; - if ( !node->isBoxingNode() ) { + if ( !node->isBoxingNode(true) ) { // (nor pseudoElem) if ( offset >= 0 ) { path << "." << fmt::decimal(offset); } @@ -15407,7 +15529,7 @@ void ldomNode::setRendMethod( lvdom_element_render_method method ) #if BUILD_LITE!=1 /// returns element style record -css_style_ref_t ldomNode::getStyle() +css_style_ref_t ldomNode::getStyle() const { ASSERT_NODE_NOT_NULL; if ( !isElement() ) @@ -15536,13 +15658,16 @@ void ldomNode::initNodeStyle() } #endif -bool ldomNode::isBoxingNode() +bool ldomNode::isBoxingNode( bool orPseudoElem ) const { if( isElement() ) { lUInt16 id = getNodeId(); if( id >= el_autoBoxing && id <= el_inlineBox ) { return true; } + if ( orPseudoElem && id == el_pseudoElem ) { + return true; + } } return false; } @@ -15555,12 +15680,16 @@ ldomNode * ldomNode::getUnboxedParent() const return parent; } +// The following 4 methods are mostly used when checking CSS siblings/child +// rules and counting list items siblings: we have them skip pseudoElems by +// using isBoxingNode(orPseudoElem=true). ldomNode * ldomNode::getUnboxedFirstChild( bool skip_text_nodes ) const { for ( int i=0; iisBoxingNode() ) { + if ( child && child->isBoxingNode(true) ) { child = child->getUnboxedFirstChild( skip_text_nodes ); + // (child will then be NULL if it was a pseudoElem) } if ( child && (!skip_text_nodes || !child->isText()) ) return child; @@ -15572,7 +15701,7 @@ ldomNode * ldomNode::getUnboxedLastChild( bool skip_text_nodes ) const { for ( int i=getChildCount()-1; i>=0; i-- ) { ldomNode * child = getChildNode(i); - if ( child && child->isBoxingNode() ) { + if ( child && child->isBoxingNode(true) ) { child = child->getUnboxedLastChild( skip_text_nodes ); } if ( child && (!skip_text_nodes || !child->isText()) ) @@ -15633,16 +15762,16 @@ ldomNode * ldomNode::getUnboxedNextSibling( bool skip_text_nodes ) const if ( !skip_text_nodes ) return n; } - else if ( !n->isBoxingNode() ) // Not a boxing node + else if ( !n->isBoxingNode(true) ) // Not a boxing node nor pseudoElem return n; - // Otherwise, this node is a boxing node (or a text node with - // no child, and we'll get back to its parent) + // Otherwise, this node is a boxing node (or a text node or a pseudoElem + // with no child, and we'll get back to its parent) } // Enter next node, and re-loop to have it checked // - if !node_entered : n is the parent and index points to the next child // we want to check - // - if n->isBoxingNode() (and node_entered=true, and index=0): enter - // the first child of this boxingNode + // - if n->isBoxingNode() (and node_entered=true, and index=0): enter the first + // child of this boxingNode (not if pseudoElem, that doesn't box anything) if ( (!node_entered || n->isBoxingNode()) && index < n->getChildCount() ) { n = n->getChildNode(index); index = 0; @@ -15676,7 +15805,7 @@ ldomNode * ldomNode::getUnboxedPrevSibling( bool skip_text_nodes ) const if ( !skip_text_nodes ) return n; } - else if ( !n->isBoxingNode() ) + else if ( !n->isBoxingNode(true) ) return n; } if ( (!node_entered || n->isBoxingNode()) && index >= 0 && index < n->getChildCount() ) { From 07f428fcf97cf48161d1fc7466194d4018ec200e Mon Sep 17 00:00:00 2001 From: poire-z Date: Thu, 4 Jun 2020 16:45:15 +0200 Subject: [PATCH 10/11] CSS: content: open-quote support via TextLangMan Get the right quote chars for each language, and ensure nested quote levels (per lang_cfg). --- crengine/include/lvstsheet.h | 6 +- crengine/include/textlang.h | 13 ++ crengine/src/lvrend.cpp | 9 ++ crengine/src/lvstsheet.cpp | 145 +++++++++++++++++---- crengine/src/lvtinydom.cpp | 54 ++++++-- crengine/src/textlang.cpp | 246 ++++++++++++++++++++++++++++++++++- 6 files changed, 430 insertions(+), 43 deletions(-) diff --git a/crengine/include/lvstsheet.h b/crengine/include/lvstsheet.h index e5cba4725..53bbdf4e6 100644 --- a/crengine/include/lvstsheet.h +++ b/crengine/include/lvstsheet.h @@ -46,6 +46,7 @@ #include "cssdef.h" #include "lvstyles.h" +#include "textlang.h" class lxmlDocBase; class ldomNode; @@ -330,7 +331,10 @@ class LVStyleSheet { /// parse color value like #334455, #345 or red bool parse_color_value( const char * & str, css_length_t & value ); -/// get computed value for a node from its parsed CSS "content:" value +/// update (if needed) a style->content (parsed from the CSS declaration) before +// applying to a node's style +void update_style_content_property( css_style_rec_t * style, ldomNode * node ); +/// get the computed final text value for a node from its style->content lString16 get_applied_content_property( ldomNode * node ); /// extract @import filename from beginning of CSS diff --git a/crengine/include/textlang.h b/crengine/include/textlang.h index 8644ded69..fd568e2ad 100644 --- a/crengine/include/textlang.h +++ b/crengine/include/textlang.h @@ -80,6 +80,8 @@ class TextLangMan static HyphMethod * getMainLangHyphMethod(); // For HyphMan::hyphenate() + static void resetCounters(); + // For frontend info about TextLangMan status and seen langs static LVPtrVector * getLangCfgList() { return &_lang_cfg_list; @@ -99,6 +101,12 @@ class TextLangCfg lString16 _lang_tag; HyphMethod * _hyph_method; + lString16 _open_quote1; + lString16 _close_quote1; + lString16 _open_quote2; + lString16 _close_quote2; + int _quote_nesting_level; + #if USE_HARFBUZZ==1 hb_language_t _hb_language; #endif @@ -110,6 +118,8 @@ class TextLangCfg bool _duplicate_real_hyphen_on_next_line; + void resetCounters(); + public: lString16 getLangTag() const { return _lang_tag; } @@ -129,6 +139,9 @@ class TextLangCfg return _hyph_method; } + lString16 & getOpeningQuote( bool update_level=true ); + lString16 & getClosingQuote( bool update_level=true ); + #if USE_HARFBUZZ==1 hb_language_t getHBLanguage() const { return _hb_language; } #endif diff --git a/crengine/src/lvrend.cpp b/crengine/src/lvrend.cpp index a8392ccc3..cce1ca781 100755 --- a/crengine/src/lvrend.cpp +++ b/crengine/src/lvrend.cpp @@ -9054,6 +9054,15 @@ void setNodeStyle( ldomNode * enode, css_style_ref_t parent_style, LVFontRef par delete pstyle->pseudo_elem_after_style; pstyle->pseudo_elem_after_style = NULL; } + + if ( nodeElementId == el_pseudoElem ) { + // Pseudo element ->content may need some update if it contains + // any of the open-quote-like tokens, to account for the + // quoting nested levels. setNodeStyle() is actually the good + // place to do that, as we're visiting all the nodes recursively. + update_style_content_property(pstyle, enode); + } + pstyle->flags = 0; // cleanup, before setStyle() adds it to cache // set calculated style diff --git a/crengine/src/lvstsheet.cpp b/crengine/src/lvstsheet.cpp index 06179145c..0259a7b2f 100644 --- a/crengine/src/lvstsheet.cpp +++ b/crengine/src/lvstsheet.cpp @@ -899,6 +899,9 @@ bool parse_content_property( const char * & str, lString16 & parsed_content) // 'n' for 'no-close-quote' // 'u' for 'url()', that we don't support // 'z' for unsupported tokens, like gradient()... + // '$' (at start) this content needs post processing before + // being applied to a node's style (needed with quotes, + // to get the correct char for the current nested level). // Note: this parsing might not be super robust with // convoluted declarations... parsed_content.clear(); @@ -906,6 +909,7 @@ bool parse_content_property( const char * & str, lString16 & parsed_content) // The presence of a single 'none' or 'normal' among multiple // values make the whole thing 'none'. bool has_none = false; + bool needs_processing_when_applying = false; while ( skip_spaces( str ) && *str!=';' && *str!='}' && *str!='!' ) { if ( substr_icompare("none", str) ) { has_none = true; @@ -918,18 +922,22 @@ bool parse_content_property( const char * & str, lString16 & parsed_content) } else if ( substr_icompare("open-quote", str) ) { parsed_content << L'Q'; + needs_processing_when_applying = true; continue; } else if ( substr_icompare("close-quote", str) ) { parsed_content << L'q'; + needs_processing_when_applying = true; continue; } else if ( substr_icompare("no-open-quote", str) ) { parsed_content << L'N'; + needs_processing_when_applying = true; continue; } else if ( substr_icompare("no-close-quote", str) ) { parsed_content << L'n'; + needs_processing_when_applying = true; continue; } else if ( substr_icompare("attr", str) ) { @@ -1052,6 +1060,9 @@ bool parse_content_property( const char * & str, lString16 & parsed_content) parsed_content.clear(); parsed_content << L'X'; } + else if ( needs_processing_when_applying ) { + parsed_content.insert(0, 1, L'$'); + } if (*str) // something (;, } or !important) follows return true; // Restore original position if we reach end of CSS string, @@ -1062,6 +1073,104 @@ bool parse_content_property( const char * & str, lString16 & parsed_content) return false; } +/// Update a style->content, post processed for its node +void update_style_content_property( css_style_rec_t * style, ldomNode * node ) { + // We don't want to update too much: styles are hashed and shared by + // multiple nodes. We don't resolve "attr()" here as attributes are + // stable (and "attr(id)" would make all style->content different + // and prevent styles from being shared, increasing the number + // of styles to cache). + // But we need to resolve quotes, according to their nesting level, + // and transform them into a litteral string 's'. + + if ( style->content.empty() || style->content[0] != L'$' ) { + // No update needed + return; + } + + // We need to know if this node is visible: if not, quotes nested + // level should not be updated. We might want to still include + // the computed quote (with quote char for level 1) for it to be + // displayed by writeNodeEx() when displaying the HTML, even if + // the node is invisible. + bool visible = style->display != css_d_none; + if ( visible ) { + ldomNode * n = node->getParentNode(); + for ( ; !n->isRoot(); n = n->getParentNode() ) { + if ( n->getStyle()->display == css_d_none ) { + visible = false; + break; + } + } + } + + // We do not support specifying quote chars to be used via CSS "quotes": + // :root { quotes: '\201c' '\201d' '\2018' '\2019'; } + // We use the ones hardcoded for the node lang tag language (or default + // typography language) provided by TextLangCfg. + // HTML5 default CSS specifies them with: + // :root:lang(af), :not(:lang(af)) > :lang(af) { quotes: '\201c' '\201d' '\2018' '\2019' } + // This might (or not) implies that nested levels are reset when entering + // text with another language, so this new language first level quote is used. + // We can actually get that same behaviour by having each TextLangCfg manage + // its own nesting level (which won't be reset when en>fr>en, though). + // But all this is quite rare, so don't bother about it much. + TextLangCfg * lang_cfg = TextLangMan::getTextLangCfg( node ); + + // Note: some quote char like (U+201C / U+201D) seem to not be mirrored + // (when using HarfBuzz) when added to some RTL arabic text. But it + // appears that way with Firefox too! + // But if we use another char (U+00AB / U+00BB), it gets mirrored correctly. + // Might be that HarfBuzz first substitute it with arabic quotes (which + // happen to look inverted), and then mirror that? + + lString16 res; + lString16 parsed_content = style->content; + lString16 quote; + int i = 1; // skip initial '$' + int parsed_content_len = parsed_content.length(); + while ( i < parsed_content_len ) { + lChar16 ctype = parsed_content[i]; + if ( ctype == 's' ) { // literal string: copy as-is + lChar16 len = parsed_content[i]; + res.append(parsed_content, i, len+2); + i += len+2; + } + else if ( ctype == 'a' ) { // attribute value: copy as-is + lChar16 len = parsed_content[i]; + res.append(parsed_content, i, len+2); + i += len+2; + } + else if ( ctype == 'Q' ) { // open-quote + quote = lang_cfg->getOpeningQuote(visible); + res << L's' << quote.length() << quote; + i += 1; + } + else if ( ctype == 'q' ) { // close-quote + quote = lang_cfg->getClosingQuote(visible); + res << L's' << quote.length() << quote; + i += 1; + } + else if ( ctype == 'N' ) { // no-open-quote + // This should just increment nested quote level and output nothing. + lang_cfg->getOpeningQuote(visible); + i += 1; + } + else if ( ctype == 'n' ) { // no-close-quote + // This should just increment nested quote level and output nothing. + lang_cfg->getClosingQuote(visible); + i += 1; + } + else { + // All other stuff are single char (u, z, X) or unsupported/bogus char. + res.append(parsed_content, i, 1); + i += 1; + } + } + // Replace style->content with what we built + style->content = res; +} + /// Returns the computed value for a node from its parsed CSS "content:" value lString16 get_applied_content_property( ldomNode * node ) { lString16 res; @@ -1100,38 +1209,24 @@ lString16 get_applied_content_property( ldomNode * node ) { // res << 0x25FD; // WHITE MEDIUM SMALL SQUARE res << 0x2B26; // WHITE MEDIUM DIAMOND } + else if ( ctype == 'X' ) { // 'none' + res.clear(); // should be standalone, but let's be sure + break; + } + else if ( ctype == 'z' ) { // unsupported token + // Just ignore it, don't show anything + } else if ( ctype == 'Q' ) { // open-quote - // Add default quoting opening char - // We do not support showing a different char for multiple nested , - // and neither the way to specify this with CSS, ie: - // q::before { content: open-quote; } - // :root { quotes: '\201c' '\201d' '\2018' '\2019'; } - // todo: have the right quote char for a language provided by lang_cfg - res << 0x201C; - // Note: this specific char seem to not be mirrored (when using HarfBuzz) when - // added to some RTL arabic text. But it appears that way with Firefox too! - // But if we use another char (0x00AB / 0x00BB), it gets mirrored correctly. - // Might be that HarfBuzz first substitute it with arabic quotes (which happen - // to look inverted), and then mirror that? + // Shouldn't happen: replaced earlier by update_style_content_property() } else if ( ctype == 'q' ) { // close-quote - // Add default quoting closing char - res << 0x201D; + // Shouldn't happen: replaced earlier by update_style_content_property() } else if ( ctype == 'N' ) { // no-open-quote - // (This should just increment nested quote level if we supported that) - // Nothing to output + // Shouldn't happen: replaced earlier by update_style_content_property() } else if ( ctype == 'n' ) { // no-close-quote - // (This should just decrement nested quote level if we supported that) - // Nothing to output - } - else if ( ctype == 'X' ) { // 'none' - res.clear(); // should be standalone, but let's be sure - break; - } - else if ( ctype == 'z' ) { // unsupported token - // Just ignore it, don't show anything + // Shouldn't happen: replaced earlier by update_style_content_property() } else { // unexpected break; diff --git a/crengine/src/lvtinydom.cpp b/crengine/src/lvtinydom.cpp index f79c4343f..fe17aac44 100644 --- a/crengine/src/lvtinydom.cpp +++ b/crengine/src/lvtinydom.cpp @@ -4510,6 +4510,9 @@ bool ldomDocument::render( LVRendPageList * pages, LVDocViewCallback * callback, // create elements, but may be prevented from doing so by an existing cache file _boxingWishedButPreventedByCache = false; + // Reset counters (quotes nesting levels...) + TextLangMan::resetCounters(); + CRLog::trace("Save stylesheet..."); _stylesheet.push(); CRLog::trace("Init node styles..."); @@ -5018,16 +5021,24 @@ void ldomElementWriter::onBodyEnter() for ( int i=0; igetChildNode(i); if ( child->getNodeId() == el_pseudoElem ) { - // ->initNodeStyle() has been done when the element was created; - // as pseudo elements have no children, let's ->initNodeRendMethod() - // now (as done in onBodyExit()). - child->initNodeRendMethod(); - // ldomNode::ensurePseudoElement() will always have inserted - // "Before" first, and "After" second. But real children might - // soon be added, and we'll have to move "After" last when done. - // Which will be done in onBodyExit(). - if ( child->hasAttribute(attr_After) ) + if ( child->hasAttribute(attr_Before) ) { + // The "Before" pseudo element (not part of the XML) + // needs to have its style applied. As it has no + // children, we can also init its rend method. + child->initNodeStyle(); + child->initNodeRendMethod(); + } + else if ( child->hasAttribute(attr_After) ) { + // For the "After" pseudo element, we need to wait + // for all real children to be added, to move it + // as its right position (last), to init its style + // (because of "content:close-quote", whose nested + // level need to have seen all previous nodes to + // be accurate) and its rendering method. + // We'll do that in onBodyExit() when called for + // this node. _pseudoElementAfterChildIndex = i; + } } } } @@ -5098,9 +5109,18 @@ void ldomNode::ensurePseudoElement( bool is_before ) { lUInt16 attribute_id = is_before ? attr_Before : attr_After; pseudo->setAttributeValue(LXML_NS_NONE, attribute_id, L""); // We are called by lvrend.cpp setNodeStyle(), after the parent - // style and font have been fully set up. - // We can set this pseudo element style as it can now properly inherit. - pseudo->initNodeStyle(); + // style and font have been fully set up. We could set this pseudo + // element style with pseudo->initNodeStyle(), as it can inherit + // properly, but we should not: + // - when re-rendering, initNodeStyleRecursive()/updateStyleDataRecursive() + // will iterate thru this node we just added as a child, and do it. + // - when XML loading, we could do it for the "Before" pseudo element, + // but for the "After" one, we need to wait for all real children to be + // added and have their style applied - just because they can change + // open-quote/close-quote nesting levels - to be sure we get the + // proper nesting level quote char for the After node. + // So, for the XML loading phase, we do that in onBodyEnter() and + // onBodyExit() when called on the parent node. } } @@ -6726,10 +6746,16 @@ void ldomElementWriter::onBodyExit() if ( _pseudoElementAfterChildIndex >= 0 ) { if ( _pseudoElementAfterChildIndex != _element->getChildCount()-1 ) { // Not the last child: move it there - // printf("moving After from %d to %d\n", _pseudoElementAfterChildIndex, _element->getChildCount()-1); - // moveItemsTo() just works to remove it, and re-add it (so, adding it at the end) + // (moveItemsTo() works just fine when the source node is also the + // target node: remove it, and re-add it, so, adding it at the end) _element->moveItemsTo( _element, _pseudoElementAfterChildIndex, _pseudoElementAfterChildIndex); } + // Now that all the real children of this node have had their + // style set, we can init the style of the "After" pseudo + // element, and its rend method as it has no children. + ldomNode * child = _element->getChildNode(_element->getChildCount()-1); + child->initNodeStyle(); + child->initNodeRendMethod(); } // if ( _element->getStyle().isNull() ) { // lString16 path; diff --git a/crengine/src/textlang.cpp b/crengine/src/textlang.cpp index e3e17e12a..c5bc9edc6 100644 --- a/crengine/src/textlang.cpp +++ b/crengine/src/textlang.cpp @@ -232,9 +232,209 @@ HyphMethod * TextLangMan::getMainLangHyphMethod() { return getTextLangCfg()->getHyphMethod(); } +void TextLangMan::resetCounters() { + for ( int i=0; i<_lang_cfg_list.length(); i++ ) { + _lang_cfg_list[i]->resetCounters(); + } +} // TextLangCfg object: per language holder of language specificities +// For CSS "content: open-quote / close-quote" +typedef struct quotes_spec { + const char * lang_tag; + const lChar16 * open_quote_level_1; + const lChar16 * close_quote_level_1; + const lChar16 * open_quote_level_2; + const lChar16 * close_quote_level_2; +} quotes_spec; + +// List built 20200601 from https://html.spec.whatwg.org/multipage/rendering.html#quotes +// 2nd part of lang_tag lowercased for easier comparison, and if multiple +// lang_tag with the same starting chars, put the longest first. +// Small issue: 3-letters lang tag not specified here might match +// a 2-letter lang tag specified here ("ito" will get those from "it"). +static quotes_spec _quotes_spec_table[] = { + { "af", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "agq", L"\x201e", L"\x201d", L"\x201a", L"\x2019" }, /* „ ” ‚ ’ */ + { "ak", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "am", L"\x00ab", L"\x00bb", L"\x2039", L"\x203a" }, /* « » ‹ › */ + { "ar", L"\x201d", L"\x201c", L"\x2019", L"\x2018" }, /* ” “ ’ ‘ */ + { "asa", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "ast", L"\x00ab", L"\x00bb", L"\x201c", L"\x201d" }, /* « » “ ” */ + { "az-cyrl", L"\x00ab", L"\x00bb", L"\x2039", L"\x203a" }, /* « » ‹ › */ + { "az", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "bas", L"\x00ab", L"\x00bb", L"\x201e", L"\x201c" }, /* « » „ “ */ + { "bem", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "bez", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "be", L"\x00ab", L"\x00bb", L"\x201e", L"\x201c" }, /* « » „ “ */ + { "bg", L"\x201e", L"\x201c", L"\x201e", L"\x201c" }, /* „ “ „ “ */ + { "bm", L"\x00ab", L"\x00bb", L"\x201c", L"\x201d" }, /* « » “ ” */ + { "bn", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "brx", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "br", L"\x00ab", L"\x00bb", L"\x201c", L"\x201d" }, /* « » “ ” */ + { "bs-cyrl", L"\x201e", L"\x201c", L"\x201a", L"\x2018" }, /* „ “ ‚ ‘ */ + { "bs", L"\x201e", L"\x201d", L"\x2018", L"\x2019" }, /* „ ” ‘ ’ */ + { "ca", L"\x00ab", L"\x00bb", L"\x201c", L"\x201d" }, /* « » “ ” */ + { "cgg", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "chr", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "cs", L"\x201e", L"\x201c", L"\x201a", L"\x2018" }, /* „ “ ‚ ‘ */ + { "cy", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "dav", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "da", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "de", L"\x201e", L"\x201c", L"\x201a", L"\x2018" }, /* „ “ ‚ ‘ */ + { "dje", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "dsb", L"\x201e", L"\x201c", L"\x201a", L"\x2018" }, /* „ “ ‚ ‘ */ + { "dua", L"\x00ab", L"\x00bb", L"\x2018", L"\x2019" }, /* « » ‘ ’ */ + { "dyo", L"\x00ab", L"\x00bb", L"\x201c", L"\x201d" }, /* « » “ ” */ + { "dz", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "ebu", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "ee", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "el", L"\x00ab", L"\x00bb", L"\x201c", L"\x201d" }, /* « » “ ” */ + { "en", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "es", L"\x00ab", L"\x00bb", L"\x201c", L"\x201d" }, /* « » “ ” */ + { "et", L"\x201e", L"\x201c", L"\x201a", L"\x2018" }, /* „ “ ‚ ‘ */ + { "eu", L"\x201c", L"\x201d", L"\x201c", L"\x201d" }, /* “ ” “ ” */ + { "ewo", L"\x00ab", L"\x00bb", L"\x201c", L"\x201d" }, /* « » “ ” */ + { "fa", L"\x00ab", L"\x00bb", L"\x2039", L"\x203a" }, /* « » ‹ › */ + { "ff", L"\x201e", L"\x201d", L"\x201a", L"\x2019" }, /* „ ” ‚ ’ */ + { "fil", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "fi", L"\x201d", L"\x201d", L"\x2019", L"\x2019" }, /* ” ” ’ ’ */ + { "fo", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "fr-ch", L"\x00ab", L"\x00bb", L"\x2039", L"\x203a" }, /* « » ‹ › */ + // { "fr", L"\x00ab", L"\x00bb", L"\x00ab", L"\x00bb" }, /* « » « » */ /* Same pair for both level, bit sad... */ + { "fr", L"\x00ab", L"\x00bb", L"\x201c", L"\x201d" }, /* « » “ ” */ /* Better to have "fr" just as "it" */ + { "ga", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "gd", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "gl", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "gsw", L"\x00ab", L"\x00bb", L"\x2039", L"\x203a" }, /* « » ‹ › */ + { "guz", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "gu", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "ha", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "he", L"\x201d", L"\x201d", L"\x2019", L"\x2019" }, /* ” ” ’ ’ */ + { "hi", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "hr", L"\x201e", L"\x201c", L"\x201a", L"\x2018" }, /* „ “ ‚ ‘ */ + { "hsb", L"\x201e", L"\x201c", L"\x201a", L"\x2018" }, /* „ “ ‚ ‘ */ + { "hu", L"\x201e", L"\x201d", L"\x00bb", L"\x00ab" }, /* „ ” » « */ + { "hy", L"\x00ab", L"\x00bb", L"\x00ab", L"\x00bb" }, /* « » « » */ + { "id", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "ig", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "is", L"\x201e", L"\x201c", L"\x201a", L"\x2018" }, /* „ “ ‚ ‘ */ + { "it", L"\x00ab", L"\x00bb", L"\x201c", L"\x201d" }, /* « » “ ” */ + { "ja", L"\x300c", L"\x300d", L"\x300e", L"\x300f" }, /* 「 」 『 』 */ + { "jgo", L"\x00ab", L"\x00bb", L"\x2039", L"\x203a" }, /* « » ‹ › */ + { "jmc", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "kab", L"\x00ab", L"\x00bb", L"\x201c", L"\x201d" }, /* « » “ ” */ + { "kam", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "ka", L"\x201e", L"\x201c", L"\x00ab", L"\x00bb" }, /* „ “ « » */ + { "kde", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "kea", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "khq", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "ki", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "kkj", L"\x00ab", L"\x00bb", L"\x2039", L"\x203a" }, /* « » ‹ › */ + { "kk", L"\x00ab", L"\x00bb", L"\x201c", L"\x201d" }, /* « » “ ” */ + { "kln", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "km", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "kn", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "ko", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "ksb", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "ksf", L"\x00ab", L"\x00bb", L"\x2018", L"\x2019" }, /* « » ‘ ’ */ + { "ky", L"\x00ab", L"\x00bb", L"\x201e", L"\x201c" }, /* « » „ “ */ + { "lag", L"\x201d", L"\x201d", L"\x2019", L"\x2019" }, /* ” ” ’ ’ */ + { "lb", L"\x201e", L"\x201c", L"\x201a", L"\x2018" }, /* „ “ ‚ ‘ */ + { "lg", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "ln", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "lo", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "lrc", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "lt", L"\x201e", L"\x201c", L"\x201e", L"\x201c" }, /* „ “ „ “ */ + { "luo", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "luy", L"\x201e", L"\x201c", L"\x201a", L"\x2018" }, /* „ “ ‚ ‘ */ + { "lu", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "lv", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "mas", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "mer", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "mfe", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "mgo", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "mg", L"\x00ab", L"\x00bb", L"\x201c", L"\x201d" }, /* « » “ ” */ + { "mk", L"\x201e", L"\x201c", L"\x201a", L"\x2018" }, /* „ “ ‚ ‘ */ + { "ml", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "mn", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "mr", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "ms", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "mt", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "mua", L"\x00ab", L"\x00bb", L"\x201c", L"\x201d" }, /* « » “ ” */ + { "my", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "mzn", L"\x00ab", L"\x00bb", L"\x2039", L"\x203a" }, /* « » ‹ › */ + { "naq", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "nb", L"\x00ab", L"\x00bb", L"\x2018", L"\x2019" }, /* « » ‘ ’ */ + { "nd", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "ne", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "nl", L"\x2018", L"\x2019", L"\x201c", L"\x201d" }, /* ‘ ’ “ ” */ + { "nmg", L"\x201e", L"\x201d", L"\x00ab", L"\x00bb" }, /* „ ” « » */ + { "nnh", L"\x00ab", L"\x00bb", L"\x201c", L"\x201d" }, /* « » “ ” */ + { "nn", L"\x00ab", L"\x00bb", L"\x2018", L"\x2019" }, /* « » ‘ ’ */ + { "nus", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "nyn", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "pa", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "pl", L"\x201e", L"\x201d", L"\x00ab", L"\x00bb" }, /* „ ” « » */ + { "pt-pt", L"\x00ab", L"\x00bb", L"\x201c", L"\x201d" }, /* « » “ ” */ + { "pt", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "rn", L"\x201d", L"\x201d", L"\x2019", L"\x2019" }, /* ” ” ’ ’ */ + { "rof", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "ro", L"\x201e", L"\x201d", L"\x00ab", L"\x00bb" }, /* „ ” « » */ + { "ru", L"\x00ab", L"\x00bb", L"\x201e", L"\x201c" }, /* « » „ “ */ + { "rwk", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "rw", L"\x00ab", L"\x00bb", L"\x2018", L"\x2019" }, /* « » ‘ ’ */ + { "sah", L"\x00ab", L"\x00bb", L"\x201e", L"\x201c" }, /* « » „ “ */ + { "saq", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "sbp", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "seh", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "ses", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "sg", L"\x00ab", L"\x00bb", L"\x201c", L"\x201d" }, /* « » “ ” */ + { "shi-latn", L"\x00ab", L"\x00bb", L"\x201e", L"\x201d" }, /* « » „ ” */ + { "shi", L"\x00ab", L"\x00bb", L"\x201e", L"\x201d" }, /* « » „ ” */ + { "si", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "sk", L"\x201e", L"\x201c", L"\x201a", L"\x2018" }, /* „ “ ‚ ‘ */ + { "sl", L"\x201e", L"\x201c", L"\x201a", L"\x2018" }, /* „ “ ‚ ‘ */ + { "sn", L"\x201d", L"\x201d", L"\x2019", L"\x2019" }, /* ” ” ’ ’ */ + { "so", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "sq", L"\x00ab", L"\x00bb", L"\x201c", L"\x201d" }, /* « » “ ” */ + { "sr-latn", L"\x201e", L"\x201c", L"\x2018", L"\x2018" }, /* „ “ ‘ ‘ */ + { "sr", L"\x201e", L"\x201c", L"\x2018", L"\x2018" }, /* „ “ ‘ ‘ */ + { "sv", L"\x201d", L"\x201d", L"\x2019", L"\x2019" }, /* ” ” ’ ’ */ + { "sw", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "ta", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "teo", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "te", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "th", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "ti-er", L"\x2018", L"\x2019", L"\x201c", L"\x201d" }, /* ‘ ’ “ ” */ + { "tk", L"\x201c", L"\x201d", L"\x201c", L"\x201d" }, /* “ ” “ ” */ + { "to", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "tr", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "twq", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "tzm", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "uk", L"\x00ab", L"\x00bb", L"\x201e", L"\x201c" }, /* « » „ “ */ + { "ur", L"\x201d", L"\x201c", L"\x2019", L"\x2018" }, /* ” “ ’ ‘ */ + { "uz-cyrl", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "uz", L"\x201c", L"\x201d", L"\x2019", L"\x2018" }, /* “ ” ’ ‘ */ + { "vai-latn", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "vai", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "vi", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "vun", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "xog", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "yav", L"\x00ab", L"\x00bb", L"\x00ab", L"\x00bb" }, /* « » « » */ + { "yo", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "yue-hans", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "yue", L"\x300c", L"\x300d", L"\x300e", L"\x300f" }, /* 「 」 『 』 */ + { "zgh", L"\x00ab", L"\x00bb", L"\x201e", L"\x201d" }, /* « » „ ” */ + { "zh-hant", L"\x300c", L"\x300d", L"\x300e", L"\x300f" }, /* 「 」 『 』 */ + { "zh", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { "zu", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }, /* “ ” ‘ ’ */ + { NULL, NULL, NULL, NULL, NULL } +}; +// Default to quotes for English +static quotes_spec _quotes_spec_default = { "", L"\x201c", L"\x201d", L"\x2018", L"\x2019" }; + #if USE_LIBUNIBREAK==1 lChar16 lb_char_sub_func_polish(const lChar16 * text, int pos, int next_usable) { // https://github.com/koreader/koreader/issues/5645#issuecomment-559193057 @@ -297,9 +497,6 @@ lChar16 lb_char_sub_func_czech_slovak(const lChar16 * text, int pos, int next_us } #endif -TextLangCfg::~TextLangCfg() { -} - // Instantiate a new TextLangCfg with properties adequate to the provided lang_tag TextLangCfg::TextLangCfg( lString16 lang_tag ) { if ( TextLangMan::_no_hyph_method == NULL ) { @@ -464,4 +661,47 @@ TextLangCfg::TextLangCfg( lString16 lang_tag ) { _duplicate_real_hyphen_on_next_line = true; } #endif + + // Language default opening and closing quotes, for CSS + // "q::before { content: open-quote }" and + // "q::after { content: close-quote }" + quotes_spec * quotes = &_quotes_spec_default; + for (int i=0; _quotes_spec_table[i].lang_tag!=NULL; i++) { + if ( lang_tag.startsWith( _quotes_spec_table[i].lang_tag ) ) { + quotes = &_quotes_spec_table[i]; + break; + } + } + // Avoid a wrap after/before an opening/close quote. + const lChar16 * quote_joiner = L"\x2060"; + // (Zero width, equivalent to deprecated ZERO WIDTH NO-BREAK SPACE) + // We might want with some languages to use a non-breaking thin space instead. + + _open_quote1 << quotes->open_quote_level_1 << quote_joiner; + _close_quote1 << quote_joiner << quotes->close_quote_level_1; + _open_quote2 << quotes->open_quote_level_2 << quote_joiner; + _close_quote2 << quote_joiner << quotes->close_quote_level_2; + + resetCounters(); +} + +TextLangCfg::~TextLangCfg() { +} + +void TextLangCfg::resetCounters() { + _quote_nesting_level = 0; +} + +lString16 & TextLangCfg::getOpeningQuote( bool update_level ) { + if ( !update_level ) + return _open_quote1; + _quote_nesting_level++; + return (_quote_nesting_level % 2) ? _open_quote1 : _open_quote2; +} + +lString16 & TextLangCfg::getClosingQuote( bool update_level ) { + if ( !update_level ) + return _close_quote1; + _quote_nesting_level--; + return ((_quote_nesting_level+1) % 2) ? _close_quote1 : _close_quote2; } From 71923f261521b25f2f442a9cbc4e4ac5276ad869 Mon Sep 17 00:00:00 2001 From: poire-z Date: Thu, 4 Jun 2020 16:45:18 +0200 Subject: [PATCH 11/11] CSS/Text selection: adds a few "-cr-hint:" tweaks One can use "-cr-hint: text-selection-inline", "text-selection-block" and "text-selection-skip" to target some elements and tweak how their text will appear (or not) in user text selection. Might be useful to exclude the content of ruby annotations () from text selection when providing it to dict lookup or translation. --- crengine/include/cssdef.h | 5 ++++- crengine/src/lvstsheet.cpp | 5 +++++ crengine/src/lvtinydom.cpp | 15 ++++++++++++++- 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/crengine/include/cssdef.h b/crengine/include/cssdef.h index ce14fe2b0..85a2b8f7a 100644 --- a/crengine/include/cssdef.h +++ b/crengine/include/cssdef.h @@ -350,7 +350,10 @@ enum css_cr_hint_t { css_cr_hint_toc_level5, css_cr_hint_toc_level6, css_cr_hint_toc_ignore, - css_cr_hint_strut_confined + css_cr_hint_strut_confined, + css_cr_hint_text_selection_inline, + css_cr_hint_text_selection_block, + css_cr_hint_text_selection_skip }; /// css length value diff --git a/crengine/src/lvstsheet.cpp b/crengine/src/lvstsheet.cpp index 0259a7b2f..a86b1d1c7 100644 --- a/crengine/src/lvstsheet.cpp +++ b/crengine/src/lvstsheet.cpp @@ -1620,6 +1620,11 @@ static const char * css_cr_hint_names[]={ // baseline and height (it could have been a non-standard named // value for line-height:, but we want to be able to not override // existing line-height: values) + + // Tweak text selection when traversing a node with these hints + "text-selection-inline", // don't add a '\n' before inner text, even if the node happens to be block + "text-selection-block", // add a '\n' before inner text even if the node happens to be inline + "text-selection-skip", // don't include inner text in text selection NULL }; diff --git a/crengine/src/lvtinydom.cpp b/crengine/src/lvtinydom.cpp index fe17aac44..2033e130b 100644 --- a/crengine/src/lvtinydom.cpp +++ b/crengine/src/lvtinydom.cpp @@ -11702,8 +11702,21 @@ class ldomTextCollector : public ldomNodeCallback { #if BUILD_LITE!=1 ldomNode * elem = (ldomNode *)ptr->getNode(); - if ( elem->getRendMethod()==erm_invisible ) + if ( elem->getRendMethod() == erm_invisible ) + return false; + // Allow tweaking that with hints + css_cr_hint_t hint = elem->getStyle()->cr_hint; + if ( hint == css_cr_hint_text_selection_skip ) { return false; + } + else if ( hint == css_cr_hint_text_selection_inline ) { + newBlock = false; + return true; + } + else if ( hint == css_cr_hint_text_selection_block ) { + newBlock = true; + return true; + } switch ( elem->getStyle()->display ) { /* case css_d_inherit: