Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Text: look for hyphenation in more words if needed #320

Merged
merged 3 commits into from
Nov 28, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions crengine/src/lvrend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2454,6 +2454,9 @@ void renderFinalBlock( ldomNode * enode, LFormattedText * txform, RenderRectAcce
case css_hyph_auto:
flags |= LTEXT_HYPHENATE;
break;
case css_hyph_none:
flags &= ~LTEXT_HYPHENATE;
break;
default:
break;
}
Expand Down
121 changes: 86 additions & 35 deletions crengine/src/lvtextfm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2796,62 +2796,113 @@ class LVFormatter {
if ( deprecatedWrapWidth>normalWrapWidth && unusedPercent>3 ) {
lastNormalWrap = lastDeprecatedWrap;
}
// If, with normal wrapping, more than 5% of line is occupied by
// spaces, try to find a word (after where we stopped) to hyphenate,
// if hyphenation is not forbidden by CSS.
// If, with normal wrapping, more than 5% of the line would not be used,
// try to find a word (from where we stopped back to lastNormalWrap) to
// hyphenate, if hyphenation is not forbidden by CSS.
// todo: decide if we should hyphenate if bidi is happening up to now
if ( lastMandatoryWrap<0 && lastNormalWrap<m_length-1 && unusedPercent > 5 &&
!(m_srcs[wordpos]->flags & LTEXT_SRC_IS_OBJECT) && (m_srcs[wordpos]->flags & LTEXT_HYPHENATE) ) {
// hyphenate word
int start, end;
// This will find the word contained at wordpos (or the previous word
// if wordpos happens to be a space or some punctuation - no issue
// with that as we'll rightly skip the hyphenation attempt below
// as 'end' will be < lastNormalWrap)
lStr_findWordBounds( m_text, m_length, wordpos, start, end );
int len = end-start;
if ( len<4 ) {
// too short word found, find next one
// (This seems wrong and a no-op, as it looks like it will find
// the exact same word as the previous call...)
lStr_findWordBounds( m_text, m_length, end-1, start, end );
len = end-start;
}
#if TRACE_LINE_SPLITTING==1
if ( len>0 ) {
CRLog::trace("wordBounds(%s) unusedSpace=%d wordWidth=%d", LCSTR(lString16(m_text+start, len)), unusedSpace, m_widths[end]-m_widths[start]);
TR("wordBounds(%s) unusedSpace=%d wordWidth=%d", LCSTR(lString16(m_text+start, len)), unusedSpace, m_widths[end]-m_widths[start]);
}
#endif
if ( start<end && start<wordpos && end>=lastNormalWrap && len>=MIN_WORD_LEN_TO_HYPHENATE ) {
if ( len > MAX_WORD_SIZE )
if ( lastMandatoryWrap<0 && lastNormalWrap<m_length-1 && unusedPercent > 5 ) {
// There may be more than one word between wordpos and lastNormalWrap (or
// pos, the start of this line): if hyphenation is not possible with
// the right most one, we have to try the previous words.
// #define DEBUG_HYPH_EXTRA_LOOPS // Uncomment for debugging loops
#ifdef DEBUG_HYPH_EXTRA_LOOPS
int debug_loop_num = 0;
#endif
int wordpos_min = lastNormalWrap > pos ? lastNormalWrap : pos;
while ( wordpos > wordpos_min ) {
if ( m_srcs[wordpos]->flags & LTEXT_SRC_IS_OBJECT ) {
wordpos--; // skip images & floats
continue;
}
#ifdef DEBUG_HYPH_EXTRA_LOOPS
debug_loop_num++;
if (debug_loop_num > 1)
printf("hyph loop #%d checking: %s\n", debug_loop_num,
LCSTR(lString16(m_text+wordpos_min, i-wordpos_min+1)));
#endif
if ( !(m_srcs[wordpos]->flags & LTEXT_HYPHENATE) ) {
// The word at worpos can't be hyphenated, but it might be
// allowed on some earlier word in another text node.
// As this is a rare situation (they are mostly all hyphenat'able,
// or none of them are), and to skip some loops, as the min size
// of a word to go look for hyphenation is 4, skip by 4 chars.
wordpos = wordpos - MIN_WORD_LEN_TO_HYPHENATE;
continue;
}
// lStr_findWordBounds() will find the word contained at wordpos
// (or the previous word if wordpos happens to be a space or some
// punctuation) by looking only for alpha chars in m_text.
// Note: it actually does that with the char at wordpos-1 - not sure
// if we shoud correct it, here or there - or if this is fine - but
// let's go with it as-is as it might be a safety and might help
// us not be stuck in some infinite loop here.
int start, end;
lStr_findWordBounds( m_text, m_length, wordpos, start, end );
if ( end <= lastNormalWrap ) {
// We passed back lastNormalWrap: no need to look for more
break;
}
int len = end - start;
if ( len < MIN_WORD_LEN_TO_HYPHENATE ) {
// Too short word found, skip it
wordpos = start - 1;
continue;
}
if ( start >= wordpos ) {
// Shouldn't happen, but let's be sure we don't get stuck
wordpos = wordpos - MIN_WORD_LEN_TO_HYPHENATE;
continue;
}
#ifdef DEBUG_HYPH_EXTRA_LOOPS
if (debug_loop_num > 1)
printf(" hyphenating: %s\n", LCSTR(lString16(m_text+start, len)));
#endif
#if TRACE_LINE_SPLITTING==1
TR("wordBounds(%s) unusedSpace=%d wordWidth=%d",
LCSTR(lString16(m_text+start, len)), unusedSpace, m_widths[end]-m_widths[start]);
#endif
// We have a valid word to look for hyphenation
if ( len > MAX_WORD_SIZE ) // hyphenate() stops/truncates at 64 chars
len = MAX_WORD_SIZE;
// HyphMan::hyphenate(), which is used by some other parts of the code,
// expects a lUInt8 array. We added flagSize=1|2 so it can set the correct
// flags on our upgraded (from lUInt8 to lUInt16) m_flags.
lUInt8 * flags = (lUInt8*) (m_flags + start);
// Fill static array with cumulative widths relative to word start
static lUInt16 widths[MAX_WORD_SIZE];
int wordStart_w = start>0 ? m_widths[start-1] : 0;
for ( int i=0; i<len; i++ ) {
widths[i] = m_widths[start+i] - wordStart_w;
}
int max_width = maxWidth + spaceReduceWidth - x - (wordStart_w - w0) - firstCharMargin;
int _hyphen_width = ((LVFont*)m_srcs[wordpos]->t.font)->getHyphenWidth();
// In some rare cases, a word here can be made with parts from multiple text nodes.
// Use the font of the text node at start to compute the hyphen width, which
// might then be wrong - but that will be smoothed by alignLine()
int _hyphen_width = ((LVFont*)m_srcs[start]->t.font)->getHyphenWidth();
if ( HyphMan::hyphenate(m_text+start, len, widths, flags, _hyphen_width, max_width, 2) ) {
for ( int i=0; i<len; i++ )
if ( (m_flags[start+i] & LCHAR_ALLOW_HYPH_WRAP_AFTER)!=0 ) {
if ( widths[i]+_hyphen_width>max_width ) {
for ( int i=0; i<len; i++ ) {
if ( m_flags[start+i] & LCHAR_ALLOW_HYPH_WRAP_AFTER ) {
if ( widths[i] + _hyphen_width > max_width ) {
TR("hyphen found, but max width reached at char %d", i);
break; // hyph is too late
}
if ( start + i > pos+1 )
if ( start + i > pos+1 ) {
lastHyphWrap = start + i;
// Keep looking for some other candidates in that word
}
}
} else {
TR("no hyphen found - max_width=%d", max_width);
}
if ( lastHyphWrap >= 0 ) {
// Found in this word, no need to look at previous words
break;
}
}
TR("no hyphen found - max_width=%d", max_width);
// Look at previous words if any
wordpos = start - 1;
}
}

// Find best position to end this line
int wrapPos = lastHyphWrap;
if ( lastMandatoryWrap>=0 )
Expand Down
36 changes: 34 additions & 2 deletions crengine/src/lvtinydom.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3517,10 +3517,10 @@ static void writeNode( LVStream * stream, ldomNode * node, bool treeLayout )
}

// Extended version of previous function for displaying selection HTML, with tunable output
#define WRITENODEEX_TEXT_UNESCAPED 0x0001 ///< let &, < and > unescaped in text nodes (makes HTML invalid)
#define WRITENODEEX_ADD_UPPER_DIR_LANG_ATTR 0x0001 ///< add dir= and lang= grabbed from upper nodes
#define WRITENODEEX_TEXT_MARK_NODE_BOUNDARIES 0x0002 ///< mark start and end of text nodes (useful when indented)
#define WRITENODEEX_TEXT_SHOW_UNICODE_CODEPOINT 0x0004 ///< show unicode codepoint after char
#define WRITENODEEX_UNUSED_2 0x0008 ///<
#define WRITENODEEX_TEXT_UNESCAPED 0x0008 ///< let &, < and > unescaped in text nodes (makes HTML invalid)
#define WRITENODEEX_INDENT_NEWLINE 0x0010 ///< indent newlines according to node level
#define WRITENODEEX_NEWLINE_BLOCK_NODES 0x0020 ///< start only nodes rendered as block/final on a new line,
/// so inline elements and text nodes are stuck together
Expand Down Expand Up @@ -3592,9 +3592,32 @@ static void writeNodeEx( LVStream * stream, ldomNode * node, lString16Collection
}

bool isInitialNode = false;
lString16 initialDirAttribute = lString16::empty_str;
lString16 initialLangAttribute = lString16::empty_str;
if (indentBaseLevel < 0) { // initial call (recursive ones will have it >=0)
indentBaseLevel = node->getNodeLevel();
isInitialNode = true;
if ( WNEFLAG(ADD_UPPER_DIR_LANG_ATTR) && !node->isRoot() ) {
// Grab any dir="rtl" and lang="ar_AA" attributes from some parent node
if ( !node->hasAttribute( attr_dir ) ) {
ldomNode *pnode = node->getParentNode();
for ( ; pnode && !pnode->isNull() && !pnode->isRoot(); pnode = pnode->getParentNode() ) {
if ( pnode->hasAttribute(attr_dir) ) {
initialDirAttribute = pnode->getAttributeValue(attr_dir);
break;
}
}
}
if ( !node->hasAttribute( attr_lang ) ) {
ldomNode *pnode = node->getParentNode();
for ( ; pnode && !pnode->isNull() && !pnode->isRoot(); pnode = pnode->getParentNode() ) {
if ( pnode->hasAttribute(attr_lang) ) {
initialLangAttribute = pnode->getAttributeValue(attr_lang);
break;
}
}
}
}
}
int level = node->getNodeLevel();
if ( node->isText() && isAfterStart && isBeforeEnd ) {
Expand Down Expand Up @@ -3782,6 +3805,15 @@ static void writeNodeEx( LVStream * stream, ldomNode * node, lString16Collection
if ( !elemNsName.empty() )
elemName = elemNsName + ":" + elemName;
*stream << "<" << elemName;
if ( isInitialNode ) {
// Add any dir="rtl" and lang="ar_AA" attributes grabbed from some parent node
if ( !initialDirAttribute.empty() ) {
*stream << " dir=\"" << UnicodeToUtf8(initialDirAttribute) << "\"";
}
if ( !initialLangAttribute.empty() ) {
*stream << " lang=\"" << UnicodeToUtf8(initialLangAttribute) << "\"";
}
}
for ( int i=0; i<(int)node->getAttrCount(); i++ ) {
const lxmlAttribute * attr = node->getAttribute(i);
if (attr) {
Expand Down