koreader · poire-z · Nov 28, 2019 · Nov 27, 2019 · Nov 27, 2019 · Nov 28, 2019
diff --git a/crengine/src/lvrend.cpp b/crengine/src/lvrend.cpp
@@ -2454,6 +2454,9 @@ void renderFinalBlock( ldomNode * enode, LFormattedText * txform, RenderRectAcce
             case css_hyph_auto:
                 flags |= LTEXT_HYPHENATE;
                 break;
+            case css_hyph_none:
+                flags &= ~LTEXT_HYPHENATE;
+                break;
             default:
                 break;
         }

diff --git a/crengine/src/lvtextfm.cpp b/crengine/src/lvtextfm.cpp
@@ -2796,62 +2796,113 @@ class LVFormatter {
             if ( deprecatedWrapWidth>normalWrapWidth && unusedPercent>3 ) {
                 lastNormalWrap = lastDeprecatedWrap;
             }
-            // If, with normal wrapping, more than 5% of line is occupied by
-            // spaces, try to find a word (after where we stopped) to hyphenate,
-            // if hyphenation is not forbidden by CSS.
+            // If, with normal wrapping, more than 5% of the line would not be used,
+            // try to find a word (from where we stopped back to lastNormalWrap) to
+            // hyphenate, if hyphenation is not forbidden by CSS.
             // todo: decide if we should hyphenate if bidi is happening up to now
-            if ( lastMandatoryWrap<0 && lastNormalWrap<m_length-1 && unusedPercent > 5 &&
-                !(m_srcs[wordpos]->flags & LTEXT_SRC_IS_OBJECT) && (m_srcs[wordpos]->flags & LTEXT_HYPHENATE) ) {
-                // hyphenate word
-                int start, end;
-                // This will find the word contained at wordpos (or the previous word
-                // if wordpos happens to be a space or some punctuation - no issue
-                // with that as we'll rightly skip the hyphenation attempt below
-                // as 'end' will be < lastNormalWrap)
-                lStr_findWordBounds( m_text, m_length, wordpos, start, end );
-                int len = end-start;
-                if ( len<4 ) {
-                    // too short word found, find next one
-                    // (This seems wrong and a no-op, as it looks like it will find
-                    // the exact same word as the previous call...)
-                    lStr_findWordBounds( m_text, m_length, end-1, start, end );
-                    len = end-start;
-                }
-#if TRACE_LINE_SPLITTING==1
-                if ( len>0 ) {
-                    CRLog::trace("wordBounds(%s) unusedSpace=%d wordWidth=%d", LCSTR(lString16(m_text+start, len)), unusedSpace, m_widths[end]-m_widths[start]);
-                    TR("wordBounds(%s) unusedSpace=%d wordWidth=%d", LCSTR(lString16(m_text+start, len)), unusedSpace, m_widths[end]-m_widths[start]);
-                }
-#endif
-                if ( start<end && start<wordpos && end>=lastNormalWrap && len>=MIN_WORD_LEN_TO_HYPHENATE ) {
-                    if ( len > MAX_WORD_SIZE )
+            if ( lastMandatoryWrap<0 && lastNormalWrap<m_length-1 && unusedPercent > 5 ) {
+                // There may be more than one word between wordpos and lastNormalWrap (or
+                // pos, the start of this line): if hyphenation is not possible with
+                // the right most one, we have to try the previous words.
+                // #define DEBUG_HYPH_EXTRA_LOOPS // Uncomment for debugging loops
+                #ifdef DEBUG_HYPH_EXTRA_LOOPS
+                    int debug_loop_num = 0;
+                #endif
+                int wordpos_min = lastNormalWrap > pos ? lastNormalWrap : pos;
+                while ( wordpos > wordpos_min ) {
+                    if ( m_srcs[wordpos]->flags & LTEXT_SRC_IS_OBJECT ) {
+                        wordpos--; // skip images & floats
+                        continue;
+                    }
+                    #ifdef DEBUG_HYPH_EXTRA_LOOPS
+                        debug_loop_num++;
+                        if (debug_loop_num > 1)
+                            printf("hyph loop #%d checking: %s\n", debug_loop_num,
+                                LCSTR(lString16(m_text+wordpos_min, i-wordpos_min+1)));
+                    #endif
+                    if ( !(m_srcs[wordpos]->flags & LTEXT_HYPHENATE) ) {
+                        // The word at worpos can't be hyphenated, but it might be
+                        // allowed on some earlier word in another text node.
+                        // As this is a rare situation (they are mostly all hyphenat'able,
+                        // or none of them are), and to skip some loops, as the min size
+                        // of a word to go look for hyphenation is 4, skip by 4 chars.
+                        wordpos = wordpos - MIN_WORD_LEN_TO_HYPHENATE;
+                        continue;
+                    }
+                    // lStr_findWordBounds() will find the word contained at wordpos
+                    // (or the previous word if wordpos happens to be a space or some
+                    // punctuation) by looking only for alpha chars in m_text.
+                    // Note: it actually does that with the char at wordpos-1 - not sure
+                    // if we shoud correct it, here or there - or if this is fine - but
+                    // let's go with it as-is as it might be a safety and might help
+                    // us not be stuck in some infinite loop here.
+                    int start, end;
+                    lStr_findWordBounds( m_text, m_length, wordpos, start, end );
+                    if ( end <= lastNormalWrap ) {
+                        // We passed back lastNormalWrap: no need to look for more
+                        break;
+                    }
+                    int len = end - start;
+                    if ( len < MIN_WORD_LEN_TO_HYPHENATE ) {
+                        // Too short word found, skip it
+                        wordpos = start - 1;
+                        continue;
+                    }
+                    if ( start >= wordpos ) {
+                        // Shouldn't happen, but let's be sure we don't get stuck
+                        wordpos = wordpos - MIN_WORD_LEN_TO_HYPHENATE;
+                        continue;
+                    }
+                    #ifdef DEBUG_HYPH_EXTRA_LOOPS
+                        if (debug_loop_num > 1)
+                            printf("  hyphenating: %s\n", LCSTR(lString16(m_text+start, len)));
+                    #endif
+                    #if TRACE_LINE_SPLITTING==1
+                        TR("wordBounds(%s) unusedSpace=%d wordWidth=%d",
+                                LCSTR(lString16(m_text+start, len)), unusedSpace, m_widths[end]-m_widths[start]);
+                    #endif
+                    // We have a valid word to look for hyphenation
+                    if ( len > MAX_WORD_SIZE ) // hyphenate() stops/truncates at 64 chars
                         len = MAX_WORD_SIZE;
                     // HyphMan::hyphenate(), which is used by some other parts of the code,
                     // expects a lUInt8 array. We added flagSize=1|2 so it can set the correct
                     // flags on our upgraded (from lUInt8 to lUInt16) m_flags.
                     lUInt8 * flags = (lUInt8*) (m_flags + start);
+                    // Fill static array with cumulative widths relative to word start
                     static lUInt16 widths[MAX_WORD_SIZE];
                     int wordStart_w = start>0 ? m_widths[start-1] : 0;
                     for ( int i=0; i<len; i++ ) {
                         widths[i] = m_widths[start+i] - wordStart_w;
                     }
                     int max_width = maxWidth + spaceReduceWidth - x - (wordStart_w - w0) - firstCharMargin;
-                    int _hyphen_width = ((LVFont*)m_srcs[wordpos]->t.font)->getHyphenWidth();
+                    // In some rare cases, a word here can be made with parts from multiple text nodes.
+                    // Use the font of the text node at start to compute the hyphen width, which
+                    // might then be wrong - but that will be smoothed by alignLine()
+                    int _hyphen_width = ((LVFont*)m_srcs[start]->t.font)->getHyphenWidth();
                     if ( HyphMan::hyphenate(m_text+start, len, widths, flags, _hyphen_width, max_width, 2) ) {
-                        for ( int i=0; i<len; i++ )
-                            if ( (m_flags[start+i] & LCHAR_ALLOW_HYPH_WRAP_AFTER)!=0 ) {
-                                if ( widths[i]+_hyphen_width>max_width ) {
+                        for ( int i=0; i<len; i++ ) {
+                            if ( m_flags[start+i] & LCHAR_ALLOW_HYPH_WRAP_AFTER ) {
+                                if ( widths[i] + _hyphen_width > max_width ) {
                                     TR("hyphen found, but max width reached at char %d", i);
                                     break; // hyph is too late
                                 }
-                                if ( start + i > pos+1 )
+                                if ( start + i > pos+1 ) {
                                     lastHyphWrap = start + i;
+                                    // Keep looking for some other candidates in that word
+                                }
                             }
-                    } else {
-                        TR("no hyphen found - max_width=%d", max_width);
+                        }
+                        if ( lastHyphWrap >= 0 ) {
+                            // Found in this word, no need to look at previous words
+                            break;
+                        }
                     }
+                    TR("no hyphen found - max_width=%d", max_width);
+                    // Look at previous words if any
+                    wordpos = start - 1;
                 }
             }
+
             // Find best position to end this line
             int wrapPos = lastHyphWrap;
             if ( lastMandatoryWrap>=0 )

diff --git a/crengine/src/lvtinydom.cpp b/crengine/src/lvtinydom.cpp
@@ -3517,10 +3517,10 @@ static void writeNode( LVStream * stream, ldomNode * node, bool treeLayout )
 }
 
 // Extended version of previous function for displaying selection HTML, with tunable output
-#define WRITENODEEX_TEXT_UNESCAPED               0x0001 ///< let &, < and > unescaped in text nodes (makes HTML invalid)
+#define WRITENODEEX_ADD_UPPER_DIR_LANG_ATTR      0x0001 ///< add dir= and lang= grabbed from upper nodes
 #define WRITENODEEX_TEXT_MARK_NODE_BOUNDARIES    0x0002 ///< mark start and end of text nodes (useful when indented)
 #define WRITENODEEX_TEXT_SHOW_UNICODE_CODEPOINT  0x0004 ///< show unicode codepoint after char
-#define WRITENODEEX_UNUSED_2                     0x0008 ///<
+#define WRITENODEEX_TEXT_UNESCAPED               0x0008 ///< let &, < and > unescaped in text nodes (makes HTML invalid)
 #define WRITENODEEX_INDENT_NEWLINE               0x0010 ///< indent newlines according to node level
 #define WRITENODEEX_NEWLINE_BLOCK_NODES          0x0020 ///< start only nodes rendered as block/final on a new line,
                                                         ///  so inline elements and text nodes are stuck together
@@ -3592,9 +3592,32 @@ static void writeNodeEx( LVStream * stream, ldomNode * node, lString16Collection
     }
 
     bool isInitialNode = false;
+    lString16 initialDirAttribute = lString16::empty_str;
+    lString16 initialLangAttribute = lString16::empty_str;
     if (indentBaseLevel < 0) { // initial call (recursive ones will have it >=0)
         indentBaseLevel = node->getNodeLevel();
         isInitialNode = true;
+        if ( WNEFLAG(ADD_UPPER_DIR_LANG_ATTR) && !node->isRoot() ) {
+            // Grab any dir="rtl" and lang="ar_AA" attributes from some parent node
+            if ( !node->hasAttribute( attr_dir ) ) {
+                ldomNode *pnode = node->getParentNode();
+                for ( ; pnode && !pnode->isNull() && !pnode->isRoot(); pnode = pnode->getParentNode() ) {
+                    if ( pnode->hasAttribute(attr_dir) ) {
+                        initialDirAttribute = pnode->getAttributeValue(attr_dir);
+                        break;
+                    }
+                }
+            }
+            if ( !node->hasAttribute( attr_lang ) ) {
+                ldomNode *pnode = node->getParentNode();
+                for ( ; pnode && !pnode->isNull() && !pnode->isRoot(); pnode = pnode->getParentNode() ) {
+                    if ( pnode->hasAttribute(attr_lang) ) {
+                        initialLangAttribute = pnode->getAttributeValue(attr_lang);
+                        break;
+                    }
+                }
+            }
+        }
     }
     int level = node->getNodeLevel();
     if ( node->isText() && isAfterStart && isBeforeEnd ) {
@@ -3782,6 +3805,15 @@ static void writeNodeEx( LVStream * stream, ldomNode * node, lString16Collection
         if ( !elemNsName.empty() )
             elemName = elemNsName + ":" + elemName;
         *stream << "<" << elemName;
+        if ( isInitialNode ) {
+            // Add any dir="rtl" and lang="ar_AA" attributes grabbed from some parent node
+            if ( !initialDirAttribute.empty() ) {
+                *stream << " dir=\"" << UnicodeToUtf8(initialDirAttribute) << "\"";
+            }
+            if ( !initialLangAttribute.empty() ) {
+                *stream << " lang=\"" << UnicodeToUtf8(initialLangAttribute) << "\"";
+            }
+        }
         for ( int i=0; i<(int)node->getAttrCount(); i++ ) {
             const lxmlAttribute * attr = node->getAttribute(i);
             if (attr) {