diff --git a/crengine/include/hyphman.h b/crengine/include/hyphman.h index 294ea859e..318e46e15 100644 --- a/crengine/include/hyphman.h +++ b/crengine/include/hyphman.h @@ -17,27 +17,45 @@ #include "lvtypes.h" #include "lvstream.h" - -class HyphMethod -{ -public: - virtual bool hyphenate( const lChar16 * str, int len, lUInt16 * widths, lUInt8 * flags, lUInt16 hyphCharWidth, lUInt16 maxWidth, size_t flagSize=1 ) = 0; - virtual ~HyphMethod() { } -}; - +#include "lvhashtable.h" #define WORD_LENGTH 64 #define MAX_REAL_WORD 24 // min value supported by algorithms is 1 (max is arbitrary 10) -// value enforced by algorithm previously was 2, so it's the default -#define HYPH_DEFAULT_HYPHEN_MIN 2 -#define HYPH_MIN_HYPHEN_MIN 1 +// 0 means to use the defaults per HyphMethod +// if set to >= 1, the values apply to all HyphMethods +#define HYPH_MIN_HYPHEN_MIN 0 #define HYPH_MAX_HYPHEN_MIN 10 +// Default for global HyphMan values is 0: use per-HyphMethod defaults +#define HYPH_DEFAULT_HYPHEN_MIN 0 +// Default for per-HyphMethod values (value enforced by algorithms +// previously was 2, so let's keep that as the default) +#define HYPHMETHOD_DEFAULT_HYPHEN_MIN 2 // Don't trust soft-hyphens when using dict or algo methods #define HYPH_DEFAULT_TRUST_SOFT_HYPHENS 0 +class HyphMethod +{ +protected: + lString16 _id; + int _left_hyphen_min; + int _right_hyphen_min; +public: + HyphMethod(lString16 id, int leftHyphenMin=HYPHMETHOD_DEFAULT_HYPHEN_MIN, int rightHyphenMin=HYPHMETHOD_DEFAULT_HYPHEN_MIN) + : _id(id) + , _left_hyphen_min(leftHyphenMin) + , _right_hyphen_min(rightHyphenMin) + { } + lString16 getId() { return _id; } + virtual bool hyphenate( const lChar16 * str, int len, lUInt16 * widths, lUInt8 * flags, lUInt16 hyphCharWidth, lUInt16 maxWidth, size_t flagSize=1 ) = 0; + virtual ~HyphMethod() { } + virtual lUInt32 getCount() { return 0; } + virtual lUInt32 getSize() { return 0; } +}; + + enum HyphDictType { HDT_NONE, // disable hyphenation @@ -70,7 +88,6 @@ class HyphDictionary #define HYPH_DICT_ID_SOFTHYPHENS L"@softhyphens" #define HYPH_DICT_ID_DICTIONARY L"@dictionary" - class HyphDictionaryList { LVPtrVector _list; @@ -86,6 +103,11 @@ class HyphDictionaryList }; #define DEF_HYPHENATION_DICT "English_US.pattern" +// We'll be loading English_US.pattern even if non-english users +// may never use it, but it's a bit tedious not going with it. +// It might use around 1M of memory, but it will avoid re-rendering +// the document if the book does not contain any language tag, and +// we end up going with it anyway. class HyphDictionary; class HyphDictionaryList; @@ -100,19 +122,21 @@ class HyphMan friend class TexHyph; friend class AlgoHyph; friend class SoftHyphensHyph; - static HyphMethod * _method; - static HyphDictionary * _selectedDictionary; - static HyphDictionaryList * _dictList; + // Obsolete: now fetched from TextLangMan main lang TextLangCfg + // static HyphMethod * _method; + // static HyphDictionary * _selectedDictionary; + static HyphDictionaryList * _dictList; // available hyph dict files (+ none/algo/softhyphens) + static LVHashTable _loaded_hyph_methods; // methods with loaded dictionaries static int _LeftHyphenMin; static int _RightHyphenMin; static int _TrustSoftHyphens; public: static void uninit(); - static bool activateDictionaryFromStream( LVStreamRef stream ); + static bool initDictionaries(lString16 dir, bool clear = true); static HyphDictionaryList * getDictList() { return _dictList; } static bool activateDictionary( lString16 id ) { return _dictList->activate(id); } - static bool initDictionaries(lString16 dir, bool clear = true); - static HyphDictionary * getSelectedDictionary() { return _selectedDictionary; } + static bool activateDictionaryFromStream( LVStreamRef stream ); // used by CoolReader on Android + static HyphDictionary * getSelectedDictionary(); // was: { return _selectedDictionary; } static int getLeftHyphenMin() { return _LeftHyphenMin; } static int getRightHyphenMin() { return _RightHyphenMin; } static bool setLeftHyphenMin( int left_hyphen_min ); @@ -120,16 +144,19 @@ class HyphMan static int getTrustSoftHyphens() { return _TrustSoftHyphens; } static bool setTrustSoftHyphens( int trust_soft_hyphen ); static bool isEnabled(); + static HyphMethod * getHyphMethodForDictionary( lString16 id, int leftHyphenMin=HYPHMETHOD_DEFAULT_HYPHEN_MIN, + int rightHyphenMin=HYPHMETHOD_DEFAULT_HYPHEN_MIN ); HyphMan(); ~HyphMan(); + static bool hyphenate( const lChar16 * str, int len, lUInt16 * widths, lUInt8 * flags, lUInt16 hyphCharWidth, lUInt16 maxWidth, size_t flagSize=1 ); + /* Obsolete: inline static bool hyphenate( const lChar16 * str, int len, lUInt16 * widths, lUInt8 * flags, lUInt16 hyphCharWidth, lUInt16 maxWidth, size_t flagSize=1 ) { return _method->hyphenate( str, len, widths, flags, hyphCharWidth, maxWidth, flagSize ); } + */ }; - - #endif diff --git a/crengine/include/lvdocviewprops.h b/crengine/include/lvdocviewprops.h index 4d2fd8bdf..6c91d0f54 100644 --- a/crengine/include/lvdocviewprops.h +++ b/crengine/include/lvdocviewprops.h @@ -48,13 +48,22 @@ #define PROP_SHOW_BATTERY_PERCENT "window.status.battery.percent" //#define PROP_FONT_KERNING_ENABLED "font.kerning.enabled" #define PROP_LANDSCAPE_PAGES "window.landscape.pages" -#define PROP_HYPHENATION_LEFT_HYPHEN_MIN "crengine.hyphenation.left.hyphen.min" -#define PROP_HYPHENATION_RIGHT_HYPHEN_MIN "crengine.hyphenation.right.hyphen.min" -#define PROP_HYPHENATION_TRUST_SOFT_HYPHENS "crengine.hyphenation.trust.soft.hyphens" +#define PROP_AUTOSAVE_BOOKMARKS "crengine.autosave.bookmarks" + +// Obsolete hyph settings: #define PROP_HYPHENATION_DICT "crengine.hyphenation.directory" #define PROP_HYPHENATION_DICT_VALUE_NONE "@none" #define PROP_HYPHENATION_DICT_VALUE_ALGORITHM "@algorithm" -#define PROP_AUTOSAVE_BOOKMARKS "crengine.autosave.bookmarks" +// Still used hyph settings: +#define PROP_HYPHENATION_LEFT_HYPHEN_MIN "crengine.hyphenation.left.hyphen.min" +#define PROP_HYPHENATION_RIGHT_HYPHEN_MIN "crengine.hyphenation.right.hyphen.min" +#define PROP_HYPHENATION_TRUST_SOFT_HYPHENS "crengine.hyphenation.trust.soft.hyphens" +// New textlang typography settings: +#define PROP_TEXTLANG_MAIN_LANG "crengine.textlang.main.lang" +#define PROP_TEXTLANG_EMBEDDED_LANGS_ENABLED "crengine.textlang.embedded.langs.enabled" +#define PROP_TEXTLANG_HYPHENATION_ENABLED "crengine.textlang.hyphenation.enabled" +#define PROP_TEXTLANG_HYPH_SOFT_HYPHENS_ONLY "crengine.textlang.hyphenation.soft.hyphens.only" +#define PROP_TEXTLANG_HYPH_FORCE_ALGORITHMIC "crengine.textlang.hyphenation.force.algorithmic" #define PROP_FLOATING_PUNCTUATION "crengine.style.floating.punctuation.enabled" diff --git a/crengine/include/lvfntman.h b/crengine/include/lvfntman.h index 2f90e3e45..25348de4c 100644 --- a/crengine/include/lvfntman.h +++ b/crengine/include/lvfntman.h @@ -24,6 +24,7 @@ #include "lvptrvec.h" #include "hyphman.h" #include "lvdrawbuf.h" +#include "textlang.h" #if !defined(__SYMBIAN32__) && defined(_WIN32) extern "C" { @@ -340,6 +341,7 @@ class LVFont : public LVRefCounter lUInt8 * flags, int max_width, lChar16 def_char, + TextLangCfg * lang_cfg=NULL, int letter_spacing=0, bool allow_hyphenation=true, lUInt32 hints=0 @@ -350,7 +352,7 @@ class LVFont : public LVRefCounter \param len is number of characters to measure \return width of specified string */ - virtual lUInt32 getTextWidth( const lChar16 * text, int len ) = 0; + virtual lUInt32 getTextWidth( const lChar16 * text, int len, TextLangCfg * lang_cfg=NULL ) = 0; // /** \brief get glyph image in 1 byte per pixel format // \param code is unicode character @@ -391,6 +393,7 @@ class LVFont : public LVRefCounter virtual int DrawTextString( LVDrawBuf * buf, int x, int y, const lChar16 * text, int len, lChar16 def_char, lUInt32 * palette = NULL, bool addHyphen = false, + TextLangCfg * lang_cfg=NULL, lUInt32 flags=0, int letter_spacing=0, int width=-1, int text_decoration_back_gap=0 ) = 0; /// constructor @@ -576,6 +579,7 @@ class LVBaseFont : public LVFont virtual int DrawTextString( LVDrawBuf * buf, int x, int y, const lChar16 * text, int len, lChar16 def_char, lUInt32 * palette, bool addHyphen, + TextLangCfg * lang_cfg=NULL, lUInt32 flags=0, int letter_spacing=0, int width=-1, int text_decoration_back_gap=0 ); }; @@ -595,6 +599,7 @@ class LBitmapFont : public LVBaseFont lUInt8 * flags, int max_width, lChar16 def_char, + TextLangCfg * lang_cfg=NULL, int letter_spacing=0, bool allow_hyphenation=true, lUInt32 hints=0 @@ -605,7 +610,7 @@ class LBitmapFont : public LVBaseFont \return width of specified string */ virtual lUInt32 getTextWidth( - const lChar16 * text, int len + const lChar16 * text, int len, TextLangCfg * lang_cfg=NULL ); virtual LVFontGlyphCacheItem * getGlyph(lUInt32 ch, lChar16 def_char=0); /// returns font baseline offset @@ -757,6 +762,7 @@ class LVWin32DrawFont : public LVBaseWin32Font lUInt8 * flags, int max_width, lChar16 def_char, + TextLangCfg * lang_cfg=NULL, int letter_spacing=0, bool allow_hyphenation=true, lUInt32 hints=0 @@ -767,7 +773,7 @@ class LVWin32DrawFont : public LVBaseWin32Font \return width of specified string */ virtual lUInt32 getTextWidth( - const lChar16 * text, int len + const lChar16 * text, int len, TextLangCfg * lang_cfg=NULL ); /// returns char width @@ -777,6 +783,7 @@ class LVWin32DrawFont : public LVBaseWin32Font virtual int DrawTextString( LVDrawBuf * buf, int x, int y, const lChar16 * text, int len, lChar16 def_char, lUInt32 * palette, bool addHyphen, + TextLangCfg * lang_cfg=NULL, lUInt32 flags=0, int letter_spacing=0, int width=-1, int text_decoration_back_gap=0 ); @@ -935,6 +942,7 @@ class LVWin32Font : public LVBaseWin32Font lUInt8 * flags, int max_width, lChar16 def_char, + TextLangCfg * lang_cfg=NULL, int letter_spacing=0, bool allow_hyphenation=true, lUInt32 hints=0 @@ -945,7 +953,7 @@ class LVWin32Font : public LVBaseWin32Font \return width of specified string */ virtual lUInt32 getTextWidth( - const lChar16 * text, int len + const lChar16 * text, int len, TextLangCfg * lang_cfg=NULL ); /** \brief get glyph image in 1 byte per pixel format diff --git a/crengine/include/lvrend.h b/crengine/include/lvrend.h index b0bfc4843..f21e3c5fd 100644 --- a/crengine/include/lvrend.h +++ b/crengine/include/lvrend.h @@ -15,6 +15,7 @@ #define __LV_REND_H_INCLUDED__ #include "lvtinydom.h" +#include "textlang.h" // Current direction, from dir="ltr" or dir="rtl" element attribute // Should map directly to the RENDER_RECT_FLAG_DIRECTION_* below @@ -120,7 +121,7 @@ int initRendMethod( ldomNode * node, bool recurseChildren, bool allowAutoboxing int styleToTextFmtFlags( const css_style_ref_t & style, int oldflags, int direction=REND_DIRECTION_UNSET ); /// renders block as single text formatter object void renderFinalBlock( ldomNode * node, LFormattedText * txform, RenderRectAccessor * fmt, int & flags, - int indent, int line_h, int valign_dy=0, bool * is_link_start=NULL ); + int indent, int line_h, TextLangCfg * lang_cfg=NULL, int valign_dy=0, bool * is_link_start=NULL ); /// renders block which contains subblocks (with gRenderBlockRenderingFlags as flags) int renderBlockElement( LVRendPageContext & context, ldomNode * enode, int x, int y, int width, int direction=REND_DIRECTION_UNSET, int * baseline=NULL ); /// renders block which contains subblocks @@ -144,7 +145,8 @@ void DrawDocument( LVDrawBuf & drawbuf, ldomNode * node, int x0, int y0, int dx, // minWidth: width with a wrap on all spaces (no hyphenation), so width taken by the longest word // full function for recursive use: void getRenderedWidths(ldomNode * node, int &maxWidth, int &minWidth, int direction, bool ignorePadding, int rendFlags, - int &curMaxWidth, int &curWordWidth, bool &collapseNextSpace, int &lastSpaceWidth, int indent, bool isStartNode=false); + int &curMaxWidth, int &curWordWidth, bool &collapseNextSpace, int &lastSpaceWidth, + int indent, TextLangCfg * lang_cfg, bool isStartNode=false); // simpler function for first call: void getRenderedWidths(ldomNode * node, int &maxWidth, int &minWidth, int direction=REND_DIRECTION_UNSET, bool ignorePadding=false, int rendFlags=0); diff --git a/crengine/include/lvstyles.h b/crengine/include/lvstyles.h index 8d0731e38..b2e9e7837 100644 --- a/crengine/include/lvstyles.h +++ b/crengine/include/lvstyles.h @@ -315,8 +315,8 @@ class lvdomElementFormatRec { int _top_overflow; // Overflow (positive value) below _y int _bottom_overflow; // Overflow (positive value) after _y+_height - int _listprop_node_idx; // dataIndex of the UL/OL node this erm_final block - // should get its marker from + int _lang_node_idx; // dataIndex of the upper node this erm_final block + // should get its lang= langage from // Flags & extras, to have additional info related to this rect cached. // - For erm_final nodes, these contain the footprint of outer floats @@ -332,18 +332,21 @@ class lvdomElementFormatRec { int _extra4; int _extra5; - // Added for padding from 14 to 16 32-bits ints + int _listprop_node_idx; // dataIndex of the UL/OL node this erm_final block + // should get its marker from + + // Added for padding from 15 to 16 32-bits ints int _available1; - int _available2; public: lvdomElementFormatRec() : _x(0), _width(0), _y(0), _height(0) , _inner_width(0), _inner_x(0), _inner_y(0), _baseline(0) - , _top_overflow(0), _bottom_overflow(0), _listprop_node_idx(0) + , _top_overflow(0), _bottom_overflow(0) + , _lang_node_idx(0) , _listprop_node_idx(0) , _flags(0), _extra0(0) , _extra1(0), _extra2(0), _extra3(0), _extra4(0), _extra5(0) - , _available1(0), _available2(0) + , _available1(0) { } ~lvdomElementFormatRec() @@ -354,10 +357,10 @@ class lvdomElementFormatRec { _x = _width = _y = _height = 0; _inner_width = _inner_x = _inner_y = _baseline = 0; _top_overflow = _bottom_overflow = 0; - _listprop_node_idx = 0; + _lang_node_idx = _listprop_node_idx = 0; _flags = _extra0 = 0; _extra1 = _extra2 = _extra3 = _extra4 = _extra5 = 0; - _available1 = 0; _available2 = 0; + _available1 = 0; } bool operator == ( lvdomElementFormatRec & v ) { @@ -365,11 +368,11 @@ class lvdomElementFormatRec { _inner_width==v._inner_width && _inner_x==v._inner_x && _inner_y==v._inner_y && _baseline==v._baseline && _top_overflow==v._top_overflow && _bottom_overflow==v._bottom_overflow && - _listprop_node_idx==v._listprop_node_idx && + _lang_node_idx==v._lang_node_idx && _listprop_node_idx==v._listprop_node_idx && _flags==v._flags && _extra0==v._extra0 && _extra1==v._extra1 && _extra2==v._extra2 && _extra3==v._extra3 && _extra4==v._extra4 && _extra5==v._extra5 && - _available1==v._available1 && _available2==v._available2 + _available1==v._available1 ); } bool operator != ( lvdomElementFormatRec & v ) @@ -378,11 +381,11 @@ class lvdomElementFormatRec { _inner_width!=v._inner_width || _inner_x!=v._inner_x || _inner_y!=v._inner_y || _baseline!=v._baseline || _top_overflow!=v._top_overflow || _bottom_overflow!=v._bottom_overflow || - _listprop_node_idx!=v._listprop_node_idx || + _lang_node_idx!=v._lang_node_idx || _listprop_node_idx!=v._listprop_node_idx || _flags!=v._flags || _extra0!=v._extra0 || _extra1!=v._extra1 || _extra2!=v._extra2 || _extra3!=v._extra3 || _extra4!=v._extra4 || _extra5!=v._extra5 || - _available1!=v._available1 || _available2!=v._available2 + _available1!=v._available1 ); } // Get/Set diff --git a/crengine/include/lvtextfm.h b/crengine/include/lvtextfm.h index a9454ec0b..7211dedcf 100755 --- a/crengine/include/lvtextfm.h +++ b/crengine/include/lvtextfm.h @@ -17,6 +17,7 @@ #include "lvfntman.h" #include "lvbmpbuf.h" +#include "textlang.h" // comment out following line to use old formatter #define USE_NEW_FORMATTER 1 @@ -82,6 +83,7 @@ extern "C" { typedef struct { void * object; /**< \brief pointer to object which represents source */ + TextLangCfg * lang_cfg; lInt16 indent; /**< \brief first line indent (or all but first, when negative) */ lInt16 valign_dy; /* drift y from baseline */ lInt16 interval; /**< \brief line height in screen pixels */ @@ -287,6 +289,7 @@ void lvtextFreeFormatter( formatted_text_fragment_t * pbuffer ); void lvtextAddSourceLine( formatted_text_fragment_t * pbuffer, lvfont_handle font, /* handle of font to draw string */ + TextLangCfg * lang_cfg, const lChar16 * text, /* pointer to unicode text string */ lUInt32 len, /* number of chars in text, 0 for auto(strlen) */ lUInt32 color, /* text color */ @@ -306,6 +309,7 @@ void lvtextAddSourceLine( */ void lvtextAddSourceObject( formatted_text_fragment_t * pbuffer, + TextLangCfg * lang_cfg, lInt16 width, lInt16 height, lUInt32 flags, /* flags */ @@ -367,6 +371,7 @@ class LFormattedText lInt16 valign_dy, /* drift y from baseline */ lInt16 indent, /* first line indent (or all but first, when negative) */ void * object, /* pointer to custom object */ + TextLangCfg * lang_cfg, lInt16 letter_spacing=0 ); @@ -375,7 +380,8 @@ class LFormattedText lUInt32 len, /* number of chars in text, 0 for auto(strlen) */ lUInt32 color, /* text color */ lUInt32 bgcolor, /* background color */ - LVFont * font, /* font to draw string */ + LVFont * font, /* font to draw string */ + TextLangCfg * lang_cfg, lUInt32 flags, /* (had default =LTEXT_ALIGN_LEFT|LTEXT_FLAG_OWNTEXT) */ lInt16 interval, /* line height in screen pixels */ lInt16 valign_dy=0, /* drift y from baseline */ @@ -387,6 +393,7 @@ class LFormattedText { lvtextAddSourceLine(m_pbuffer, font, //font->GetHandle() + lang_cfg, text, len, color, bgcolor, flags, interval, valign_dy, indent, object, (lUInt16)offset, letter_spacing ); } diff --git a/crengine/include/lvtinydom.h b/crengine/include/lvtinydom.h index 03956899c..b9bf950ad 100755 --- a/crengine/include/lvtinydom.h +++ b/crengine/include/lvtinydom.h @@ -704,6 +704,8 @@ class RenderRectAccessor : public lvdomElementFormatRec void setBaseline( int baseline ); int getListPropNodeIndex(); void setListPropNodeIndex( int idx ); + int getLangNodeIndex(); + void setLangNodeIndex( int idx ); unsigned short getFlags(); void setFlags( unsigned short flags ); diff --git a/crengine/include/lvxml.h b/crengine/include/lvxml.h index db40a02e4..842f3990e 100644 --- a/crengine/include/lvxml.h +++ b/crengine/include/lvxml.h @@ -442,4 +442,48 @@ lString16 LVReadTextFile( lString16 filename ); LVStreamRef GetFB2Coverpage(LVStreamRef stream); +#define BASE64_BUF_SIZE 128 +class LVBase64Stream : public LVNamedStream +{ +private: + lString8 m_curr_text; + int m_text_pos; + lvsize_t m_size; + lvpos_t m_pos; + int m_iteration; + lUInt32 m_value; + lUInt8 m_bytes[BASE64_BUF_SIZE]; + int m_bytes_count; + int m_bytes_pos; + int readNextBytes(); + int bytesAvailable(); + bool rewind(); + bool skip( lvsize_t count ); +public: + virtual ~LVBase64Stream() { } + LVBase64Stream(lString8 data); + virtual lverror_t Seek(lvoffset_t offset, lvseek_origin_t origin, lvpos_t* newPos); + virtual lverror_t Read(void* buf, lvsize_t size, lvsize_t* pBytesRead); + virtual bool Eof() { + return m_pos >= m_size; + } + virtual lvsize_t GetSize() { + return m_size; + } + virtual lvpos_t GetPos() { + return m_pos; + } + virtual lverror_t GetPos( lvpos_t * pos ) { + if (pos) + *pos = m_pos; + return LVERR_OK; + } + virtual lverror_t Write(const void*, lvsize_t, lvsize_t*) { + return LVERR_NOTIMPL; + } + virtual lverror_t SetSize(lvsize_t) { + return LVERR_NOTIMPL; + } +}; + #endif // __LVXML_H_INCLUDED__ diff --git a/crengine/include/textlang.h b/crengine/include/textlang.h new file mode 100644 index 000000000..2a8b970f7 --- /dev/null +++ b/crengine/include/textlang.h @@ -0,0 +1,152 @@ +#ifndef __TEXTLANG_H_INCLUDED__ +#define __TEXTLANG_H_INCLUDED__ + +#if USE_HARFBUZZ==1 +#include +#include +#endif + +#if USE_LIBUNIBREAK==1 +#include + // linebreakdef.h is not wrapped by this, unlike linebreak.h + // (not wrapping results in "undefined symbol" with the original + // function name kinda obfuscated) + #ifdef __cplusplus + extern "C" { + #endif +#include + #ifdef __cplusplus + } + #endif +#endif + +// Be similar to HyphMan default state with "English_US.pattern" +#define TEXTLANG_DEFAULT_MAIN_LANG "en" // for LVDocView +#define TEXTLANG_DEFAULT_MAIN_LANG_16 L"en" // for textlang.cpp +#define TEXTLANG_DEFAULT_EMBEDDED_LANGS_ENABLED false +#define TEXTLANG_DEFAULT_HYPHENATION_ENABLED true +#define TEXTLANG_DEFAULT_HYPH_SOFT_HYPHENS_ONLY false +#define TEXTLANG_DEFAULT_HYPH_FORCE_ALGORITHMIC false +#define TEXTLANG_FALLBACK_HYPH_DICT_ID L"English_US.pattern" // For languages without specific hyph dicts + +class TextLangCfg; + +class TextLangMan +{ + friend TextLangCfg; + static lString16 _main_lang; + static bool _embedded_langs_enabled; + static LVPtrVector _lang_cfg_list; + + static bool _overridden_hyph_method; // (to avoid checking the 3 following bool) + static bool _hyphenation_enabled; + static bool _hyphenation_soft_hyphens_only; + static bool _hyphenation_force_algorithmic; + static HyphMethod * _no_hyph_method; // instance of hyphman NoHyph + static HyphMethod * _soft_hyphens_method; // instance of hyphman SoftHyphensHyph + static HyphMethod * _algo_hyph_method; // instance of hyphman AlgoHyph + + static HyphMethod * getHyphMethodForLang( lString16 lang_tag ); // Used by TextLangCfg +public: + static void uninit(); + static lUInt32 getHash(); + + static void setMainLang( lString16 lang_tag ) { _main_lang = lang_tag; } + static void setMainLangFromHyphDict( lString16 id ); // For HyphMan legacy methods + static lString16 getMainLang() { return _main_lang; } + + static void setEmbeddedLangsEnabled( bool enabled ) { _embedded_langs_enabled = enabled; } + static bool getEmbeddedLangsEnabled() { return _embedded_langs_enabled; } + + static bool getHyphenationEnabled() { return _hyphenation_enabled; } + static void setHyphenationEnabled( bool enabled ) { + _hyphenation_enabled = enabled; + _overridden_hyph_method = !_hyphenation_enabled || _hyphenation_soft_hyphens_only || _hyphenation_force_algorithmic; + } + + static bool getHyphenationSoftHyphensOnly() { return _hyphenation_soft_hyphens_only; } + static void setHyphenationSoftHyphensOnly( bool enabled ) { + _hyphenation_soft_hyphens_only = enabled; + _overridden_hyph_method = !_hyphenation_enabled || _hyphenation_soft_hyphens_only || _hyphenation_force_algorithmic; + } + + static bool getHyphenationForceAlgorithmic() { return _hyphenation_force_algorithmic; } + static void setHyphenationForceAlgorithmic( bool enabled ) { + _hyphenation_force_algorithmic = enabled; + _overridden_hyph_method = !_hyphenation_enabled || _hyphenation_soft_hyphens_only || _hyphenation_force_algorithmic; + } + + static TextLangCfg * getTextLangCfg(); // get LangCfg for _main_lang + static TextLangCfg * getTextLangCfg( lString16 lang_tag ); + static TextLangCfg * getTextLangCfg( ldomNode * node ); + static int getLangNodeIndex( ldomNode * node ); + + static HyphMethod * getMainLangHyphMethod(); // For HyphMan::hyphenate() + + // For frontend info about TextLangMan status and seen langs + static LVPtrVector * getLangCfgList() { + return &_lang_cfg_list; + } + + TextLangMan(); + ~TextLangMan(); +}; + +#define MAX_NB_LB_PROPS_ITEMS 10 // for our statically sized array (increase if needed) + +typedef lChar16 (*lb_char_sub_func_t)(const lChar16 * text, int pos, int next_usable); + +class TextLangCfg +{ + friend TextLangMan; + lString16 _lang_tag; + HyphMethod * _hyph_method; + + #if USE_HARFBUZZ==1 + hb_language_t _hb_language; + #endif + + #if USE_LIBUNIBREAK==1 + lb_char_sub_func_t _lb_char_sub_func; + struct LineBreakProperties _lb_props[MAX_NB_LB_PROPS_ITEMS]; + #endif + + bool _duplicate_real_hyphen_on_next_line; + +public: + lString16 getLangTag() const { return _lang_tag; } + + HyphMethod * getHyphMethod() const { + if ( !TextLangMan::_overridden_hyph_method ) + return _hyph_method; + if ( !TextLangMan::_hyphenation_enabled ) + return TextLangMan::_no_hyph_method; + if ( TextLangMan::_hyphenation_soft_hyphens_only ) + return TextLangMan::_soft_hyphens_method; + if ( TextLangMan::_hyphenation_force_algorithmic ) + return TextLangMan::_algo_hyph_method; + // Should not be reached + return _hyph_method; + } + HyphMethod * getDefaultHyphMethod() const { + return _hyph_method; + } + + #if USE_HARFBUZZ==1 + hb_language_t getHBLanguage() const { return _hb_language; } + #endif + + #if USE_LIBUNIBREAK==1 + bool hasLBCharSubFunc() const { return _lb_char_sub_func != NULL; } + lb_char_sub_func_t getLBCharSubFunc() const { return _lb_char_sub_func; } + struct LineBreakProperties * getLBProps() const { return (struct LineBreakProperties *)_lb_props; } + #endif + + bool duplicateRealHyphenOnNextLine() const { return _duplicate_real_hyphen_on_next_line; } + + TextLangCfg( lString16 lang_tag ); + ~TextLangCfg(); +}; + + +#endif diff --git a/crengine/src/hyphman.cpp b/crengine/src/hyphman.cpp index 25f90cecf..e456c81b0 100755 --- a/crengine/src/hyphman.cpp +++ b/crengine/src/hyphman.cpp @@ -38,6 +38,7 @@ #include "../include/hyphman.h" #include "../include/lvfnt.h" #include "../include/lvstring.h" +#include "../include/textlang.h" #ifdef ANDROID @@ -53,8 +54,10 @@ int HyphMan::_LeftHyphenMin = HYPH_DEFAULT_HYPHEN_MIN; int HyphMan::_RightHyphenMin = HYPH_DEFAULT_HYPHEN_MIN; int HyphMan::_TrustSoftHyphens = HYPH_DEFAULT_TRUST_SOFT_HYPHENS; +LVHashTable HyphMan::_loaded_hyph_methods(16); -HyphDictionary * HyphMan::_selectedDictionary = NULL; +// Obsolete: now fetched from TextLangMan main lang TextLangCfg +// HyphDictionary * HyphMan::_selectedDictionary = NULL; HyphDictionaryList * HyphMan::_dictList = NULL; @@ -68,21 +71,25 @@ class TexHyph : public HyphMethod { TexPattern * table[PATTERN_HASH_SIZE]; lUInt32 _hash; + lUInt32 _pattern_count; public: int largest_overflowed_word; bool match( const lChar16 * str, char * mask ); virtual bool hyphenate( const lChar16 * str, int len, lUInt16 * widths, lUInt8 * flags, lUInt16 hyphCharWidth, lUInt16 maxWidth, size_t flagSize ); void addPattern( TexPattern * pattern ); - TexHyph(); + TexHyph( lString16 id=HYPH_DICT_ID_DICTIONARY, int leftHyphenMin=HYPHMETHOD_DEFAULT_HYPHEN_MIN, int rightHyphenMin=HYPHMETHOD_DEFAULT_HYPHEN_MIN ); virtual ~TexHyph(); bool load( LVStreamRef stream ); bool load( lString16 fileName ); virtual lUInt32 getHash() { return _hash; } + virtual lUInt32 getCount() { return _pattern_count; } + virtual lUInt32 getSize(); }; class AlgoHyph : public HyphMethod { public: + AlgoHyph(): HyphMethod(HYPH_DICT_ID_ALGORITHM) {}; virtual bool hyphenate( const lChar16 * str, int len, lUInt16 * widths, lUInt8 * flags, lUInt16 hyphCharWidth, lUInt16 maxWidth, size_t flagSize ); virtual ~AlgoHyph(); }; @@ -90,6 +97,7 @@ class AlgoHyph : public HyphMethod class SoftHyphensHyph : public HyphMethod { public: + SoftHyphensHyph(): HyphMethod(HYPH_DICT_ID_SOFTHYPHENS) {}; virtual bool hyphenate( const lChar16 * str, int len, lUInt16 * widths, lUInt8 * flags, lUInt16 hyphCharWidth, lUInt16 maxWidth, size_t flagSize ); virtual ~SoftHyphensHyph(); }; @@ -97,6 +105,7 @@ class SoftHyphensHyph : public HyphMethod class NoHyph : public HyphMethod { public: + NoHyph(): HyphMethod(HYPH_DICT_ID_NONE) {}; virtual bool hyphenate( const lChar16 * str, int len, lUInt16 * widths, lUInt8 * flags, lUInt16 hyphCharWidth, lUInt16 maxWidth, size_t flagSize ) { CR_UNUSED6(str, len, widths, flags, hyphCharWidth, maxWidth); @@ -109,7 +118,8 @@ static NoHyph NO_HYPH; static AlgoHyph ALGO_HYPH; static SoftHyphensHyph SOFTHYPHENS_HYPH; -HyphMethod * HyphMan::_method = &NO_HYPH; +// Obsolete: provided by TextLangMan main lang +// HyphMethod * HyphMan::_method = &NO_HYPH; #pragma pack(push, 1) typedef struct { @@ -132,26 +142,39 @@ typedef struct { void HyphMan::uninit() { - if ( _dictList ) - delete _dictList; + // Avoid existing frontend code to have to call it: + TextLangMan::uninit(); + // Clean up _loaded_hyph_methods + LVHashTable::iterator it = _loaded_hyph_methods.forwardIterator(); + LVHashTable::pair* pair; + while ((pair = it.next())) { + delete pair->value; + } + _loaded_hyph_methods.clear(); + if ( _dictList ) + delete _dictList; _dictList = NULL; + /* Obsolete: _selectedDictionary = NULL; if ( HyphMan::_method != &ALGO_HYPH && HyphMan::_method != &NO_HYPH && HyphMan::_method != &SOFTHYPHENS_HYPH ) delete HyphMan::_method; _method = &NO_HYPH; + */ } bool HyphMan::activateDictionaryFromStream( LVStreamRef stream ) { if ( stream.isNull() ) return false; + /* Obsolete: CRLog::trace("remove old hyphenation method"); if ( HyphMan::_method != &NO_HYPH && HyphMan::_method != &ALGO_HYPH && HyphMan::_method != &SOFTHYPHENS_HYPH && HyphMan::_method ) { delete HyphMan::_method; HyphMan::_method = &NO_HYPH; } + */ CRLog::trace("creating new TexHyph method"); - TexHyph * method = new TexHyph(); + TexHyph * method = new TexHyph(HYPH_DICT_ID_DICTIONARY); CRLog::trace("loading from file"); if ( !method->load( stream ) ) { CRLog::error("HyphMan::activateDictionaryFromStream: Cannot open hyphenation dictionary from stream" ); @@ -161,14 +184,28 @@ bool HyphMan::activateDictionaryFromStream( LVStreamRef stream ) if (method->largest_overflowed_word) printf("CRE WARNING: hyph dict from stream: some hyphenation patterns were too long and have been ignored: increase MAX_PATTERN_SIZE from %d to %d\n", MAX_PATTERN_SIZE, method->largest_overflowed_word); CRLog::debug("Dictionary is loaded successfully. Activating."); + + // Replace any previously dict loaded from stream + HyphMethod * prev_method; + if ( _loaded_hyph_methods.get(HYPH_DICT_ID_DICTIONARY, prev_method) ) { + delete prev_method; + _loaded_hyph_methods.remove(HYPH_DICT_ID_DICTIONARY); + } + _loaded_hyph_methods.set(HYPH_DICT_ID_DICTIONARY, method); + if (!_dictList) _dictList = new HyphDictionaryList(); + /* Obsolete: HyphMan::_method = method; + */ if ( HyphMan::_dictList->find(lString16(HYPH_DICT_ID_DICTIONARY))==NULL ) { HyphDictionary * dict = new HyphDictionary( HDT_DICT_ALAN, cs16("Dictionary"), lString16(HYPH_DICT_ID_DICTIONARY), lString16::empty_str ); HyphMan::_dictList->add(dict); + /* Obsolete: HyphMan::_selectedDictionary = dict; + */ } + TextLangMan::setMainLangFromHyphDict( HYPH_DICT_ID_DICTIONARY ); CRLog::trace("Activation is done"); return true; } @@ -211,11 +248,67 @@ bool HyphMan::setTrustSoftHyphens( int trust_soft_hyphens ) { } bool HyphMan::isEnabled() { + return TextLangMan::getHyphenationEnabled(); + /* Obsolete: return _selectedDictionary != NULL && _selectedDictionary->getId() != HYPH_DICT_ID_NONE; + */ +} + +bool HyphMan::hyphenate( const lChar16 * str, int len, lUInt16 * widths, lUInt8 * flags, lUInt16 hyphCharWidth, lUInt16 maxWidth, size_t flagSize ) +{ + return TextLangMan::getMainLangHyphMethod()->hyphenate( str, len, widths, flags, hyphCharWidth, maxWidth, flagSize ); + /* Obsolete: + return _method->hyphenate( str, len, widths, flags, hyphCharWidth, maxWidth, flagSize ); + */ +} + +HyphDictionary * HyphMan::getSelectedDictionary() { + lString16 id = TextLangMan::getTextLangCfg()->getHyphMethod()->getId(); + HyphDictionary * dict = _dictList->find( id ); + return dict; +} + +HyphMethod * HyphMan::getHyphMethodForDictionary( lString16 id, int leftHyphenMin, int rightHyphenMin ) { + if ( id.empty() ) + return &NO_HYPH; + HyphDictionary * p = _dictList->find(id); + if ( !p || p->getType() == HDT_NONE ) + return &NO_HYPH; + if ( p->getType() == HDT_ALGORITHM ) + return &ALGO_HYPH; + if ( p->getType() == HDT_SOFTHYPHENS ) + return &SOFTHYPHENS_HYPH; + if ( p->getType() != HDT_DICT_ALAN && p->getType() != HDT_DICT_TEX ) + return &NO_HYPH; + HyphMethod * method; + if ( _loaded_hyph_methods.get(id, method) ) { + // printf("getHyphMethodForDictionary reusing cached %s\n", UnicodeToUtf8(p->getFilename()).c_str()); + return method; + } + lString16 filename = p->getFilename(); + LVStreamRef stream = LVOpenFileStream( filename.c_str(), LVOM_READ ); + if ( stream.isNull() ) { + CRLog::error("Cannot open hyphenation dictionary %s", UnicodeToUtf8(filename).c_str() ); + return &NO_HYPH; + } + TexHyph * newmethod = new TexHyph(id, leftHyphenMin, rightHyphenMin); + if ( !newmethod->load( stream ) ) { + CRLog::error("Cannot open hyphenation dictionary %s", UnicodeToUtf8(filename).c_str() ); + delete newmethod; + return &NO_HYPH; + } + // printf("CRE: loaded hyphenation dict %s\n", UnicodeToUtf8(id).c_str()); + if ( newmethod->largest_overflowed_word ) + printf("CRE WARNING: %s: some hyphenation patterns were too long and have been ignored: increase MAX_PATTERN_SIZE from %d to %d\n", UnicodeToUtf8(filename).c_str(), MAX_PATTERN_SIZE, newmethod->largest_overflowed_word); + _loaded_hyph_methods.set(id, newmethod); + return newmethod; } bool HyphDictionary::activate() { + TextLangMan::setMainLangFromHyphDict( getId() ); + return true; + /* Obsolete: if (HyphMan::_selectedDictionary == this) return true; // already active if ( getType() == HDT_ALGORITHM ) { @@ -262,6 +355,7 @@ bool HyphDictionary::activate() } HyphMan::_selectedDictionary = this; return true; + */ } bool HyphDictionaryList::activate( lString16 id ) @@ -604,10 +698,11 @@ class HyphPatternReader : public LVXMLParserCallback }; -TexHyph::TexHyph() +TexHyph::TexHyph(lString16 id, int leftHyphenMin, int rightHyphenMin) : HyphMethod(id, leftHyphenMin, rightHyphenMin) { memset( table, 0, sizeof(table) ); _hash = 123456; + _pattern_count = 0; largest_overflowed_word = 0; } @@ -631,6 +726,11 @@ void TexHyph::addPattern( TexPattern * pattern ) p = &((*p)->next); pattern->next = *p; *p = pattern; + _pattern_count++; +} + +lUInt32 TexHyph::getSize() { + return _pattern_count * sizeof(TexPattern); } bool TexHyph::load( LVStreamRef stream ) @@ -684,6 +784,7 @@ bool TexHyph::load( LVStreamRef stream ) CRLog::warn("Pattern overflowed (%d > %d) and ignored: '%s'", pattern->overflowed, MAX_PATTERN_SIZE, LCSTR(lString16(pattern->word))); if (pattern->overflowed > largest_overflowed_word) largest_overflowed_word = pattern->overflowed; + delete pattern; } else { addPattern( pattern ); @@ -721,6 +822,7 @@ bool TexHyph::load( LVStreamRef stream ) CRLog::warn("Pattern overflowed (%d > %d) and ignored: '%s'", pattern->overflowed, MAX_PATTERN_SIZE, LCSTR(lString16(pattern->word))); if (pattern->overflowed > largest_overflowed_word) largest_overflowed_word = pattern->overflowed; + delete pattern; } else { addPattern( pattern ); @@ -753,6 +855,7 @@ bool TexHyph::load( LVStreamRef stream ) CRLog::warn("Pattern overflowed (%d > %d) and ignored: (%s) '%s'", pattern->overflowed, MAX_PATTERN_SIZE, LCSTR(data[i]), LCSTR(lString16(pattern->word))); if (pattern->overflowed > largest_overflowed_word) largest_overflowed_word = pattern->overflowed; + delete pattern; } else { addPattern( pattern ); @@ -885,6 +988,11 @@ bool TexHyph::hyphenate( const lChar16 * str, int len, lUInt16 * widths, lUInt8 CRLog::trace("Hyphenate: %s %s", LCSTR(buf), LCSTR(buf2) ); #endif + // Use HyphMan global left/right hyphen min, unless set to 0 (the default) + // which means we should use the HyphMethod specific values. + int left_hyphen_min = HyphMan::_LeftHyphenMin ? HyphMan::_LeftHyphenMin : _left_hyphen_min; + int right_hyphen_min = HyphMan::_RightHyphenMin ? HyphMan::_RightHyphenMin : _right_hyphen_min; + // Moves allowed hyphenation positions from 'mask' to the provided 'flags', // taking soft-hyphen shifts into account int soft_hyphens_skipped = 0; @@ -895,9 +1003,9 @@ bool TexHyph::hyphenate( const lChar16 * str, int len, lUInt16 * widths, lUInt8 soft_hyphens_skipped++; continue; } - if (p-soft_hyphens_skipped < HyphMan::_LeftHyphenMin - 1) + if (p-soft_hyphens_skipped < left_hyphen_min - 1) continue; - if (p > len - HyphMan::_RightHyphenMin - 1) + if (p > len - right_hyphen_min - 1) continue; // hyphenate //00010030100 @@ -926,6 +1034,12 @@ bool AlgoHyph::hyphenate( const lChar16 * str, int len, lUInt16 * widths, lUInt8 if ( softhyphens_hyphenate(str, len, widths, flags, hyphCharWidth, maxWidth, flagSize) ) return true; } + + // Use HyphMan global left/right hyphen min, unless set to 0 (the default) + // which means we should use the HyphMethod specific values. + int left_hyphen_min = HyphMan::_LeftHyphenMin ? HyphMan::_LeftHyphenMin : _left_hyphen_min; + int right_hyphen_min = HyphMan::_RightHyphenMin ? HyphMan::_RightHyphenMin : _right_hyphen_min; + lUInt16 chprops[WORD_LENGTH]; if ( len > WORD_LENGTH-2 ) len = WORD_LENGTH - 2; @@ -942,9 +1056,9 @@ bool AlgoHyph::hyphenate( const lChar16 * str, int len, lUInt16 * widths, lUInt8 // now look over word, placing hyphens if ( end-start > MIN_WORD_LEN_TO_HYPHEN ) { // word must be long enough for (i=start;i maxWidth ) break; diff --git a/crengine/src/lvdocview.cpp b/crengine/src/lvdocview.cpp index e0df5e683..fb2f528b3 100755 --- a/crengine/src/lvdocview.cpp +++ b/crengine/src/lvdocview.cpp @@ -19,6 +19,7 @@ #include "../include/lvstyles.h" #include "../include/lvrend.h" #include "../include/lvstsheet.h" +#include "../include/textlang.h" #include "../include/wolutil.h" #include "../include/crtxtenc.h" @@ -1059,14 +1060,14 @@ void LVDocView::drawCoverTo(LVDrawBuf * drawBuf, lvRect & rc) { LFormattedText txform; if (!authors.empty()) txform.AddSourceLine(authors.c_str(), authors.length(), 0xFFFFFFFF, - 0xFFFFFFFF, author_fnt.get(), LTEXT_ALIGN_CENTER, + 0xFFFFFFFF, author_fnt.get(), NULL, LTEXT_ALIGN_CENTER, author_fnt->getHeight() * 18 / 16); txform.AddSourceLine(title.c_str(), title.length(), 0xFFFFFFFF, 0xFFFFFFFF, - title_fnt.get(), LTEXT_ALIGN_CENTER, + title_fnt.get(), NULL, LTEXT_ALIGN_CENTER, title_fnt->getHeight() * 18 / 16); if (!series.empty()) txform.AddSourceLine(series.c_str(), series.length(), 0xFFFFFFFF, - 0xFFFFFFFF, series_fnt.get(), LTEXT_ALIGN_CENTER, + 0xFFFFFFFF, series_fnt.get(), NULL, LTEXT_ALIGN_CENTER, series_fnt->getHeight() * 18 / 16); int title_w = rc.width() - rc.width() / 4; int h = txform.Format((lUInt16)title_w, (lUInt16)rc.height()); @@ -6380,6 +6381,36 @@ CRPropRef LVDocView::propsApply(CRPropRef props) { REQUEST_RENDER("propsApply hyphenation trust_soft_hyphens") } #endif + } else if (name == PROP_TEXTLANG_MAIN_LANG) { + lString16 lang = props->getStringDef(PROP_TEXTLANG_MAIN_LANG, TEXTLANG_DEFAULT_MAIN_LANG); + if ( lang != TextLangMan::getMainLang() ) { + TextLangMan::setMainLang( lang ); + REQUEST_RENDER("propsApply textlang main_lang") + } + } else if (name == PROP_TEXTLANG_EMBEDDED_LANGS_ENABLED) { + bool enabled = props->getIntDef(PROP_TEXTLANG_EMBEDDED_LANGS_ENABLED, TEXTLANG_DEFAULT_EMBEDDED_LANGS_ENABLED); + if ( enabled != TextLangMan::getEmbeddedLangsEnabled() ) { + TextLangMan::setEmbeddedLangsEnabled( enabled ); + REQUEST_RENDER("propsApply textlang embedded_langs_enabled") + } + } else if (name == PROP_TEXTLANG_HYPHENATION_ENABLED) { + bool enabled = props->getIntDef(PROP_TEXTLANG_HYPHENATION_ENABLED, TEXTLANG_DEFAULT_HYPHENATION_ENABLED); + if ( enabled != TextLangMan::getHyphenationEnabled() ) { + TextLangMan::setHyphenationEnabled( enabled ); + REQUEST_RENDER("propsApply textlang hyphenation_enabled") + } + } else if (name == PROP_TEXTLANG_HYPH_SOFT_HYPHENS_ONLY) { + bool enabled = props->getIntDef(PROP_TEXTLANG_HYPH_SOFT_HYPHENS_ONLY, TEXTLANG_DEFAULT_HYPH_SOFT_HYPHENS_ONLY); + if ( enabled != TextLangMan::getHyphenationSoftHyphensOnly() ) { + TextLangMan::setHyphenationSoftHyphensOnly( enabled ); + REQUEST_RENDER("propsApply textlang hyphenation_soft_hyphens_only") + } + } else if (name == PROP_TEXTLANG_HYPH_FORCE_ALGORITHMIC) { + bool enabled = props->getIntDef(PROP_TEXTLANG_HYPH_FORCE_ALGORITHMIC, TEXTLANG_DEFAULT_HYPH_FORCE_ALGORITHMIC); + if ( enabled != TextLangMan::getHyphenationForceAlgorithmic() ) { + TextLangMan::setHyphenationForceAlgorithmic( enabled ); + REQUEST_RENDER("propsApply textlang hyphenation_force_algorithmic") + } } else if (name == PROP_INTERLINE_SPACE) { int interlineSpace = props->getIntDef(PROP_INTERLINE_SPACE, cr_interline_spaces[0]); diff --git a/crengine/src/lvfntman.cpp b/crengine/src/lvfntman.cpp index 13ade230b..061cc5675 100644 --- a/crengine/src/lvfntman.cpp +++ b/crengine/src/lvfntman.cpp @@ -1797,6 +1797,7 @@ class LVFreeTypeFace : public LVFont lUInt8 * flags, int max_width, lChar16 def_char, + TextLangCfg * lang_cfg = NULL, int letter_spacing = 0, bool allow_hyphenation = true, lUInt32 hints=0 @@ -1897,6 +1898,9 @@ class LVFreeTypeFace : public LVFont hb_flags |= HB_BUFFER_FLAG_EOT; hb_buffer_set_flags(_hb_buffer, (hb_buffer_flags_t)hb_flags); } + if ( lang_cfg ) { + hb_buffer_set_language(_hb_buffer, lang_cfg->getHBLanguage()); + } // Let HB guess what's not been set (script, direction, language) hb_buffer_guess_segment_properties(_hb_buffer); @@ -2020,7 +2024,7 @@ class LVFreeTypeFace : public LVFont fb_hints &= ~LFNT_HINT_ENDS_PARAGRAPH; fallback->measureText( text + t_notdef_start, t_notdef_end - t_notdef_start, widths + t_notdef_start, flags + t_notdef_start, - max_width, def_char, letter_spacing, allow_hyphenation, + max_width, def_char, lang_cfg, letter_spacing, allow_hyphenation, fb_hints ); // Fix previous bad measurements int last_good_width = t_notdef_start > 0 ? widths[t_notdef_start-1] : 0; @@ -2114,7 +2118,7 @@ class LVFreeTypeFace : public LVFont int chars_measured = fallback->measureText( text + t_notdef_start, // start t_notdef_end - t_notdef_start, // len widths + t_notdef_start, flags + t_notdef_start, - max_width, def_char, letter_spacing, allow_hyphenation, + max_width, def_char, lang_cfg, letter_spacing, allow_hyphenation, fb_hints ); lastFitChar = t_notdef_start + chars_measured; int last_good_width = t_notdef_start > 0 ? widths[t_notdef_start-1] : 0; @@ -2292,7 +2296,10 @@ class LVFreeTypeFace : public LVFont lStr_findWordBounds( text, len, lastFitChar-1, hwStart, hwEnd ); if ( hwStart < (int)(lastFitChar-1) && hwEnd > hwStart+3 ) { //int maxw = max_width - (hwStart>0 ? widths[hwStart-1] : 0); - HyphMan::hyphenate(text+hwStart, hwEnd-hwStart, widths+hwStart, flags+hwStart, _hyphen_width, max_width); + if ( lang_cfg ) + lang_cfg->getHyphMethod()->hyphenate(text+hwStart, hwEnd-hwStart, widths+hwStart, flags+hwStart, _hyphen_width, max_width); + else // Use global lang hyph method + HyphMan::hyphenate(text+hwStart, hwEnd-hwStart, widths+hwStart, flags+hwStart, _hyphen_width, max_width); } } } @@ -2304,7 +2311,7 @@ class LVFreeTypeFace : public LVFont \param len is number of characters to measure \return width of specified string */ - virtual lUInt32 getTextWidth( const lChar16 * text, int len) { + virtual lUInt32 getTextWidth( const lChar16 * text, int len, TextLangCfg * lang_cfg=NULL) { static lUInt16 widths[MAX_LINE_CHARS+1]; static lUInt8 flags[MAX_LINE_CHARS+1]; if ( len>MAX_LINE_CHARS ) @@ -2317,7 +2324,7 @@ class LVFreeTypeFace : public LVFont flags, MAX_LINE_WIDTH, L' ', // def_char - 0 + lang_cfg ); if ( res>0 && resgetHBLanguage()); + } // Let HB guess what's not been set (script, direction, language) hb_buffer_guess_segment_properties(_hb_buffer); @@ -2815,7 +2826,7 @@ class LVFreeTypeFace : public LVFont // text decoration, that we dropped: no update needed) int fb_advance = fallback->DrawTextString( buf, x, fb_y, fb_text, fb_len, - def_char, palette, fb_addHyphen, fb_flags, letter_spacing, + def_char, palette, fb_addHyphen, lang_cfg, fb_flags, letter_spacing, width, text_decoration_back_gap ); x += fb_advance; #ifdef DEBUG_DRAW_TEXT @@ -3142,6 +3153,7 @@ class LVFontBoldTransform : public LVFont lUInt8 * flags, int max_width, lChar16 def_char, + TextLangCfg * lang_cfg = NULL, int letter_spacing=0, bool allow_hyphenation=true, lUInt32 hints=0 @@ -3154,6 +3166,7 @@ class LVFontBoldTransform : public LVFont flags, max_width, def_char, + lang_cfg, letter_spacing, allow_hyphenation, hints @@ -3171,7 +3184,7 @@ class LVFontBoldTransform : public LVFont \param len is number of characters to measure \return width of specified string */ - virtual lUInt32 getTextWidth( const lChar16 * text, int len) { + virtual lUInt32 getTextWidth( const lChar16 * text, int len, TextLangCfg * lang_cfg=NULL) { static lUInt16 widths[MAX_LINE_CHARS+1]; static lUInt8 flags[MAX_LINE_CHARS+1]; if ( len>MAX_LINE_CHARS ) @@ -3184,7 +3197,7 @@ class LVFontBoldTransform : public LVFont flags, MAX_LINE_WIDTH, L' ', // def_char - 0 + lang_cfg ); if ( res>0 && res0 && res0 && resgetHyphMethod()->hyphenate(text+hwStart, hwEnd-hwStart, widths+hwStart, flags+hwStart, _hyphen_width, max_width); + else // Use global lang hyph method + HyphMan::hyphenate(text+hwStart, hwEnd-hwStart, widths+hwStart, flags+hwStart, _hyphen_width, max_width); return nchars; } @@ -5718,6 +5739,7 @@ lUInt16 LVWin32DrawFont::measureText( int LVWin32DrawFont::DrawTextString( LVDrawBuf * buf, int x, int y, const lChar16 * text, int len, lChar16 def_char, lUInt32 * palette, bool addHyphen, + TextLangCfg * lang_cfg, lUInt32 flags, int letter_spacing, int width, int text_decoration_back_gap ) { @@ -5945,7 +5967,7 @@ bool LVWin32Font::getGlyphInfo( lUInt16 code, glyph_info_t * glyph, lChar16 def_ return true; } -lUInt32 LVWin32Font::getTextWidth( const lChar16 * text, int len ) +lUInt32 LVWin32Font::getTextWidth( const lChar16 * text, int len, TextLangCfg * lang_cfg=NULL ) { // static lUInt16 widths[MAX_LINE_CHARS+1]; @@ -5959,7 +5981,8 @@ lUInt32 LVWin32Font::getTextWidth( const lChar16 * text, int len ) widths, flags, MAX_LINE_WIDTH, - L' ' // def_char + L' ', // def_char + lang_cfg ); if ( res>0 && resgetHyphMethod()->hyphenate(text+hwStart, hwEnd-hwStart, widths+hwStart, flags+hwStart, hyphwidth, max_width); + else // Use global lang hyph method + HyphMan::hyphenate(text+hwStart, hwEnd-hwStart, widths+hwStart, flags+hwStart, hyphwidth, max_width); return nchars; } diff --git a/crengine/src/lvrend.cpp b/crengine/src/lvrend.cpp index e1baf872d..38c4c3994 100755 --- a/crengine/src/lvrend.cpp +++ b/crengine/src/lvrend.cpp @@ -1369,6 +1369,7 @@ class CCRTable { fmt.setInnerWidth( w - padding_left - padding_right ); RENDER_RECT_SET_FLAG(fmt, INNER_FIELDS_SET); RENDER_RECT_SET_DIRECTION(fmt, caption_direction); + fmt.setLangNodeIndex( TextLangMan::getLangNodeIndex(caption) ); } fmt.push(); caption_h = caption->renderFinalBlock( txform, &fmt, w - padding_left - padding_right ); @@ -1479,6 +1480,7 @@ class CCRTable { fmt.setInnerWidth( cell->width - padding_left - padding_right ); RENDER_RECT_SET_FLAG(fmt, INNER_FIELDS_SET); RENDER_RECT_SET_DIRECTION(fmt, cell->direction); + fmt.setLangNodeIndex( TextLangMan::getLangNodeIndex(cell->elem) ); } fmt.push(); int h = cell->elem->renderFinalBlock( txform, &fmt, cell->width - padding_left - padding_right); @@ -2368,7 +2370,8 @@ lString16 renderListItemMarker( ldomNode * enode, int & marker_width, LFormatted // (the "xviii" marker will be in its own LTR segment, and the followup text // in another LTR segment) if ( txform ) { - txform->AddSourceLine( marker.c_str(), marker.length(), cl, bgcl, font, flags|LTEXT_FLAG_OWNTEXT, line_h, 0, 0); + TextLangCfg * lang_cfg = TextLangMan::getTextLangCfg( enode ); + txform->AddSourceLine( marker.c_str(), marker.length(), cl, bgcl, font, lang_cfg, flags|LTEXT_FLAG_OWNTEXT, line_h, 0, 0); } } return marker; @@ -2419,13 +2422,19 @@ bool renderAsListStylePositionInside( const css_style_rec_t * style, bool is_rtl // as is to the inline children elements: it is only used to get the width of // the container, which is only needed to compute indent (text-indent) values in %, // and to get paragraph direction (LTR/RTL/UNSET). -void renderFinalBlock( ldomNode * enode, LFormattedText * txform, RenderRectAccessor * fmt, int & baseflags, int indent, int line_h, int valign_dy, bool * is_link_start ) +void renderFinalBlock( ldomNode * enode, LFormattedText * txform, RenderRectAccessor * fmt, int & baseflags, int indent, int line_h, TextLangCfg * lang_cfg, int valign_dy, bool * is_link_start ) { if ( enode->isElement() ) { lvdom_element_render_method rm = enode->getRendMethod(); if ( rm == erm_invisible ) return; // don't draw invisible + if ( enode->hasAttribute( attr_lang ) ) { + lString16 lang_tag = enode->getAttributeValue( attr_lang ); + if ( !lang_tag.empty() ) + lang_cfg = TextLangMan::getTextLangCfg( lang_tag ); + } + if ( enode->isFloatingBox() && rm != erm_final ) { // (A floating floatBox can't be erm_final: it is always erm_block, // but let's just be sure of that.) @@ -2436,7 +2445,7 @@ void renderFinalBlock( ldomNode * enode, LFormattedText * txform, RenderRectAcce // be guessed and renderBlockElement() called to render it // and get is height, so LFormattedText knows how to render // this erm_final text around it. - txform->AddSourceObject(baseflags|LTEXT_SRC_IS_FLOAT, line_h, valign_dy, indent, enode ); + txform->AddSourceObject(baseflags|LTEXT_SRC_IS_FLOAT, line_h, valign_dy, indent, enode, lang_cfg ); baseflags &= ~LTEXT_FLAG_NEWLINE & ~LTEXT_SRC_IS_CLEAR_BOTH; // clear newline flag return; } @@ -2825,7 +2834,7 @@ void renderFinalBlock( ldomNode * enode, LFormattedText * txform, RenderRectAcce if ( sp==css_lsp_outside ) margin = -marker_width; // will ensure negative/hanging indent-like rendering marker += "\t"; - txform->AddSourceLine( marker.c_str(), marker.length(), cl, bgcl, font, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy, + txform->AddSourceLine( marker.c_str(), marker.length(), cl, bgcl, font, lang_cfg, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy, margin, NULL ); flags &= ~LTEXT_FLAG_NEWLINE & ~LTEXT_SRC_IS_CLEAR_BOTH; } @@ -2884,27 +2893,27 @@ void renderFinalBlock( ldomNode * enode, LFormattedText * txform, RenderRectAcce lString16Collection lines; lines.parse(title, cs16("\\n"), true); for ( int i=0; iAddSourceLine( lines[i].c_str(), lines[i].length(), cl, bgcl, font, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy, 0, NULL ); + txform->AddSourceLine( lines[i].c_str(), lines[i].length(), cl, bgcl, font, lang_cfg, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy, 0, NULL ); } - txform->AddSourceObject(flags, line_h, valign_dy, indent, enode ); + txform->AddSourceObject(flags, line_h, valign_dy, indent, enode, lang_cfg ); title = enode->getAttributeValue(attr_subtitle); if ( !title.empty() ) { lString16Collection lines; lines.parse(title, cs16("\\n"), true); for ( int i=0; iAddSourceLine( lines[i].c_str(), lines[i].length(), cl, bgcl, font, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy, 0, NULL ); + txform->AddSourceLine( lines[i].c_str(), lines[i].length(), cl, bgcl, font, lang_cfg, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy, 0, NULL ); } title = enode->getAttributeValue(attr_title); if ( !title.empty() ) { lString16Collection lines; lines.parse(title, cs16("\\n"), true); for ( int i=0; iAddSourceLine( lines[i].c_str(), lines[i].length(), cl, bgcl, font, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy, 0, NULL ); + txform->AddSourceLine( lines[i].c_str(), lines[i].length(), cl, bgcl, font, lang_cfg, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy, 0, NULL ); } } else { // inline image // We use the flags computed previously (and not baseflags) as they // carry vertical alignment - txform->AddSourceObject(flags, line_h, valign_dy, indent, enode ); + txform->AddSourceObject(flags, line_h, valign_dy, indent, enode, lang_cfg ); flags &= ~LTEXT_FLAG_NEWLINE & ~LTEXT_SRC_IS_CLEAR_BOTH; // clear newline flag } } @@ -2957,7 +2966,7 @@ void renderFinalBlock( ldomNode * enode, LFormattedText * txform, RenderRectAcce } // We use the flags computed previously (and not baseflags) as they // carry vertical alignment - txform->AddSourceObject(flags|LTEXT_SRC_IS_INLINE_BOX, line_h, valign_dy, indent, enode ); + txform->AddSourceObject(flags|LTEXT_SRC_IS_INLINE_BOX, line_h, valign_dy, indent, enode, lang_cfg ); if ( is_embedded_block ) { // Let flags unchanged, with their newline/alignment flag as if it // hadn't been consumed, so it is reported back into baseflags below @@ -2983,7 +2992,8 @@ void renderFinalBlock( ldomNode * enode, LFormattedText * txform, RenderRectAcce // Don't handle dir= for the erm_final (

hasAttribute( attr_dir ) && rm != erm_final; + bool hasDirAttribute = enode->hasAttribute( attr_dir ) && rm != erm_final + && rm != erm_table_caption && rm != erm_list_item; bool addGeneratedContent = hasDirAttribute || nodeElementId == el_bdi || nodeElementId == el_bdo || @@ -3009,7 +3019,7 @@ void renderFinalBlock( ldomNode * enode, LFormattedText * txform, RenderRectAcce // But if we use another char (0x00AB / 0x00BB), it gets mirrored correctly. // Might be that HarfBuzz first substitute it with arabic quotes (which happen // to look inverted), and then mirror that? - txform->AddSourceLine( L"\x201C", 1, cl, bgcl, font, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy); + txform->AddSourceLine( L"\x201C", 1, cl, bgcl, font, lang_cfg, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy); flags &= ~LTEXT_FLAG_NEWLINE & ~LTEXT_SRC_IS_CLEAR_BOTH; // clear newline flag } // The following is needed for fribidi to do the right thing when the content creator @@ -3032,16 +3042,16 @@ void renderFinalBlock( ldomNode * enode, LFormattedText * txform, RenderRectAcce // leaving => PDF PDI // but it then doesn't have the intended effect (fribidi bug or limitation?) if ( dir.compare("rtl") == 0 ) { - // txform->AddSourceLine( L"\x2068\x202E", 1, cl, bgcl, font, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy); + // txform->AddSourceLine( L"\x2068\x202E", 1, cl, bgcl, font, lang_cfg, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy); // closeWithPDFPDI = true; - txform->AddSourceLine( L"\x202E", 1, cl, bgcl, font, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy); + txform->AddSourceLine( L"\x202E", 1, cl, bgcl, font, lang_cfg, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy); closeWithPDF = true; flags &= ~LTEXT_FLAG_NEWLINE & ~LTEXT_SRC_IS_CLEAR_BOTH; // clear newline flag } else if ( dir.compare("ltr") == 0 ) { - // txform->AddSourceLine( L"\x2068\x202D", 1, cl, bgcl, font, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy); + // txform->AddSourceLine( L"\x2068\x202D", 1, cl, bgcl, font, lang_cfg, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy); // closeWithPDFPDI = true; - txform->AddSourceLine( L"\x202D", 1, cl, bgcl, font, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy); + txform->AddSourceLine( L"\x202D", 1, cl, bgcl, font, lang_cfg, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy); closeWithPDF = true; flags &= ~LTEXT_FLAG_NEWLINE & ~LTEXT_SRC_IS_CLEAR_BOTH; // clear newline flag } @@ -3054,17 +3064,17 @@ void renderFinalBlock( ldomNode * enode, LFormattedText * txform, RenderRectAcce // dir=auto => FSI U+2068 FIRST STRONG ISOLATE // leaving => PDI U+2069 POP DIRECTIONAL ISOLATE if ( dir.compare("rtl") == 0 ) { - txform->AddSourceLine( L"\x2067", 1, cl, bgcl, font, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy); + txform->AddSourceLine( L"\x2067", 1, cl, bgcl, font, lang_cfg, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy); closeWithPDI = true; flags &= ~LTEXT_FLAG_NEWLINE & ~LTEXT_SRC_IS_CLEAR_BOTH; // clear newline flag } else if ( dir.compare("ltr") == 0 ) { - txform->AddSourceLine( L"\x2066", 1, cl, bgcl, font, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy); + txform->AddSourceLine( L"\x2066", 1, cl, bgcl, font, lang_cfg, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy); closeWithPDI = true; flags &= ~LTEXT_FLAG_NEWLINE & ~LTEXT_SRC_IS_CLEAR_BOTH; // clear newline flag } else if ( nodeElementId == el_bdi || dir.compare("auto") == 0 ) { - txform->AddSourceLine( L"\x2068", 1, cl, bgcl, font, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy); + txform->AddSourceLine( L"\x2068", 1, cl, bgcl, font, lang_cfg, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy); closeWithPDI = true; flags &= ~LTEXT_FLAG_NEWLINE & ~LTEXT_SRC_IS_CLEAR_BOTH; // clear newline flag } @@ -3097,7 +3107,7 @@ void renderFinalBlock( ldomNode * enode, LFormattedText * txform, RenderRectAcce for (int i=0; igetChildNode( i ); - renderFinalBlock( child, txform, fmt, flags, indent, line_h, valign_dy, is_link_start_p ); + renderFinalBlock( child, txform, fmt, flags, indent, line_h, lang_cfg, valign_dy, is_link_start_p ); } if ( addGeneratedContent ) { @@ -3106,20 +3116,20 @@ void renderFinalBlock( ldomNode * enode, LFormattedText * txform, RenderRectAcce lUInt32 bgcl = style->background_color.type!=css_val_color ? 0xFFFFFFFF : style->background_color.value; if ( nodeElementId == el_q ) { // Add default quoting closing char - txform->AddSourceLine( L"\x201D", 1, cl, bgcl, font, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy); + txform->AddSourceLine( L"\x201D", 1, cl, bgcl, font, lang_cfg, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy); flags &= ~LTEXT_FLAG_NEWLINE & ~LTEXT_SRC_IS_CLEAR_BOTH; // clear newline flag } // See comment above: these are the closing counterpart if ( closeWithPDI ) { - txform->AddSourceLine( L"\x2069", 1, cl, bgcl, font, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy); + txform->AddSourceLine( L"\x2069", 1, cl, bgcl, font, lang_cfg, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy); flags &= ~LTEXT_FLAG_NEWLINE & ~LTEXT_SRC_IS_CLEAR_BOTH; // clear newline flag } else if ( closeWithPDFPDI ) { - txform->AddSourceLine( L"\x202C\x2069", 1, cl, bgcl, font, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy); + txform->AddSourceLine( L"\x202C\x2069", 1, cl, bgcl, font, lang_cfg, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy); flags &= ~LTEXT_FLAG_NEWLINE & ~LTEXT_SRC_IS_CLEAR_BOTH; // clear newline flag } else if ( closeWithPDF ) { - txform->AddSourceLine( L"\x202C", 1, cl, bgcl, font, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy); + txform->AddSourceLine( L"\x202C", 1, cl, bgcl, font, lang_cfg, flags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy); flags &= ~LTEXT_FLAG_NEWLINE & ~LTEXT_SRC_IS_CLEAR_BOTH; // clear newline flag } } @@ -3134,7 +3144,7 @@ void renderFinalBlock( ldomNode * enode, LFormattedText * txform, RenderRectAcce lUInt32 cl = style->color.type!=css_val_color ? 0xFFFFFFFF : style->color.value; lUInt32 bgcl = style->background_color.type!=css_val_color ? 0xFFFFFFFF : style->background_color.value; lChar16 delimiter[] = {UNICODE_NO_BREAK_SPACE, UNICODE_NO_BREAK_SPACE}; //160 - txform->AddSourceLine( delimiter, sizeof(delimiter)/sizeof(lChar16), cl, bgcl, font, LTEXT_FLAG_OWNTEXT | LTEXT_RUNIN_FLAG, line_h, valign_dy, 0, NULL ); + txform->AddSourceLine( delimiter, sizeof(delimiter)/sizeof(lChar16), cl, bgcl, font, lang_cfg, LTEXT_FLAG_OWNTEXT | LTEXT_RUNIN_FLAG, line_h, valign_dy, 0, NULL ); flags &= ~LTEXT_RUNIN_FLAG; } } @@ -3171,7 +3181,7 @@ void renderFinalBlock( ldomNode * enode, LFormattedText * txform, RenderRectAcce LVFont * font = enode->getFont().get(); lUInt32 cl = style->color.type!=css_val_color ? 0xFFFFFFFF : style->color.value; lUInt32 bgcl = style->background_color.type!=css_val_color ? 0xFFFFFFFF : style->background_color.value; - txform->AddSourceLine( L" ", 1, cl, bgcl, font, baseflags | LTEXT_FLAG_OWNTEXT, line_h, valign_dy); + txform->AddSourceLine( L" ", 1, cl, bgcl, font, lang_cfg, baseflags | LTEXT_FLAG_OWNTEXT, line_h, valign_dy); // baseflags &= ~LTEXT_FLAG_NEWLINE; // clear newline flag // No need to clear the flag, as we set it just below // (any LTEXT_ALIGN_* set implies LTEXT_FLAG_NEWLINE) @@ -3230,7 +3240,7 @@ void renderFinalBlock( ldomNode * enode, LFormattedText * txform, RenderRectAcce LVFont * font = enode->getFont().get(); lUInt32 cl = style->color.type!=css_val_color ? 0xFFFFFFFF : style->color.value; lUInt32 bgcl = style->background_color.type!=css_val_color ? 0xFFFFFFFF : style->background_color.value; - txform->AddSourceLine( L" ", 1, cl, bgcl, font, baseflags|LTEXT_SRC_IS_CLEAR_LAST|LTEXT_FLAG_OWNTEXT, line_h, valign_dy); + txform->AddSourceLine( L" ", 1, cl, bgcl, font, lang_cfg, baseflags|LTEXT_SRC_IS_CLEAR_LAST|LTEXT_FLAG_OWNTEXT, line_h, valign_dy); } } else if ( enode->isText() ) { @@ -3325,9 +3335,13 @@ void renderFinalBlock( ldomNode * enode, LFormattedText * txform, RenderRectAcce } */ if ( txt.length()>0 ) { - txform->AddSourceLine( txt.c_str(), txt.length(), cl, bgcl, font, baseflags | tflags, + txform->AddSourceLine( txt.c_str(), txt.length(), cl, bgcl, font, lang_cfg, baseflags | tflags, line_h, valign_dy, indent, enode, 0, letter_spacing ); baseflags &= ~LTEXT_FLAG_NEWLINE & ~LTEXT_SRC_IS_CLEAR_BOTH; // clear newline flag + // To show the lang tag for the lang used for this text node AFTER it: + // lString16 lang_tag_txt = L"[" + (lang_cfg ? lang_cfg->getLangTag() : lString16("??")) + L"]"; + // txform->AddSourceLine( lang_tag_txt.c_str(), lang_tag_txt.length(), cl, bgcl, font, + // lang_cfg, baseflags|tflags|LTEXT_FLAG_OWNTEXT, line_h, valign_dy, 0, NULL ); } } } @@ -4095,6 +4109,7 @@ int renderBlockElementLegacy( LVRendPageContext & context, ldomNode * enode, int fmt.setWidth( width ); fmt.setX( fmt.getX() ); fmt.setY( fmt.getY() ); + fmt.setLangNodeIndex( 0 ); // No support for lang in legacy rendering fmt.push(); //if ( CRLog::isTraceEnabled() ) // CRLog::trace("rendering final node: %s %d %s", LCSTR(enode->getNodeName()), enode->getDataIndex(), LCSTR(ldomXPointer(enode,0).toString()) ); @@ -4323,16 +4338,16 @@ class FlowState { // an inner block (so, making a sub-level). class BlockShift { public: int direction; - ldomNode * lang_node; + lInt32 lang_node_idx; int x_min; int x_max; int l_y; int in_y_min; int in_y_max; bool avoid_pb_inside; - void reset(int dir, ldomNode * langnode, int xmin, int xmax, int ly, int iymin, int iymax, bool avoidpbinside) { + void reset(int dir, lInt32 langNodeIdx, int xmin, int xmax, int ly, int iymin, int iymax, bool avoidpbinside) { direction = dir; - lang_node = langnode; + lang_node_idx = langNodeIdx; x_min = xmin; x_max = xmax; l_y = ly; @@ -4340,9 +4355,9 @@ class FlowState { in_y_max = iymax; avoid_pb_inside = avoidpbinside; } - BlockShift(int dir, ldomNode * langnode, int xmin, int xmax, int ly, int iymin, int iymax, bool avoidpbinside) : + BlockShift(int dir, lInt32 langNodeIdx, int xmin, int xmax, int ly, int iymin, int iymax, bool avoidpbinside) : direction(dir), - lang_node(langnode), + lang_node_idx(langNodeIdx), x_min(xmin), x_max(xmax), l_y(ly), @@ -4367,12 +4382,10 @@ class FlowState { { } }; int direction; // flow inline direction (LTR/RTL) - ldomNode * lang_node; // nearest upper node with a lang="" attribute (NULL if none) + lInt32 lang_node_idx; // dataIndex of nearest upper node with a lang="" attribute (0 if none) // We don't need to know its value in here, the idx of this node // will be saved in the final block RenderRectAccessor so it can // be fetched from the node when needed, when laying out text). - // todo: currently not used, should be saved in RenderRectAccessor - // and used by lvtextfm.cpp for typography LVRendPageContext & context; LVPtrVector _shifts; LVPtrVector _floats; @@ -4409,9 +4422,9 @@ class FlowState { int vm_back_usable_as_margin; // previously moved vertical space where next margin could be accounted in public: - FlowState( LVRendPageContext & ctx, int width, int rendflags, int dir=REND_DIRECTION_UNSET, ldomNode * langnode=NULL ): + FlowState( LVRendPageContext & ctx, int width, int rendflags, int dir=REND_DIRECTION_UNSET, lInt32 langNodeIdx=0 ): direction(dir), - lang_node(langnode), + lang_node_idx(langNodeIdx), context(ctx), rend_flags(rendflags), level(0), @@ -4466,11 +4479,14 @@ class FlowState { } } + bool isMainFlow() { + return is_main_flow; + } int getDirection() { return direction; } - bool isMainFlow() { - return is_main_flow; + lInt32 getLangNodeIndex() { + return lang_node_idx; } int getOriginalContainerWidth() { return o_width; @@ -5224,18 +5240,18 @@ class FlowState { // Enter/leave a block level: backup/restore some of this FlowState // fields, and do some housekeeping. - void newBlockLevel( int width, int d_left, bool avoid_pb, int dir, ldomNode * langnode ) { + void newBlockLevel( int width, int d_left, bool avoid_pb, int dir, lInt32 langNodeIdx ) { // Don't new/delete to avoid too many malloc/free, keep and re-use/reset // the ones already created if ( _shifts.length() <= level ) { - _shifts.push( new BlockShift( direction, lang_node, x_min, x_max, l_y, in_y_min, in_y_max, avoid_pb_inside ) ); + _shifts.push( new BlockShift( direction, lang_node_idx, x_min, x_max, l_y, in_y_min, in_y_max, avoid_pb_inside ) ); } else { - _shifts[level]->reset( direction, lang_node, x_min, x_max, l_y, in_y_min, in_y_max, avoid_pb_inside ); + _shifts[level]->reset( direction, lang_node_idx, x_min, x_max, l_y, in_y_min, in_y_max, avoid_pb_inside ); } direction = dir; - if (langnode != NULL) - lang_node = langnode; + if (langNodeIdx != -1) + lang_node_idx = langNodeIdx; x_min += d_left; x_max = x_min + width; l_y = c_y; @@ -5256,7 +5272,7 @@ class FlowState { bottom_overflow = in_y_max > last_c_y ? in_y_max - last_c_y : 0; // positive value BlockShift * prev = _shifts[level-1]; direction = prev->direction; - lang_node = prev->lang_node; + lang_node_idx = prev->lang_node_idx; x_min = prev->x_min; x_max = prev->x_max; l_y = prev->l_y; @@ -5914,6 +5930,7 @@ void renderBlockElementEnhanced( FlowState * flow, ldomNode * enode, int x, int return; css_style_rec_t * style = enode->getStyle().get(); + lUInt16 nodeElementId = enode->getNodeId(); // See if dir= attribute or CSS specified direction int direction = flow->getDirection(); @@ -5945,7 +5962,7 @@ void renderBlockElementEnhanced( FlowState * flow, ldomNode * enode, int x, int // See if lang= attribute bool has_lang_attribute = false; - if ( enode->hasAttribute( attr_lang ) ) { + if ( enode->hasAttribute( attr_lang ) && !enode->getAttributeValue( attr_lang ).empty() ) { // We'll probably have to check it is a valid lang specification // before overriding the upper one. // lString16 lang = enode->getAttributeValue( attr_lang ); @@ -6051,9 +6068,11 @@ void renderBlockElementEnhanced( FlowState * flow, ldomNode * enode, int x, int // Adjust box size and position //


gets its style width, height and margin:auto no matter flags - bool is_hr = enode->getNodeId() == el_hr; + bool is_hr = nodeElementId == el_hr; + //
seen as block elements (when they are "display:block" and ended up + // not part of a final node) will get some height if none specified // block element with height added for empty lines in txt document - bool is_empty_line_elem = enode->getNodeId() == el_empty_line; + bool is_br_or_empty_line_elem = (nodeElementId == el_br) || (nodeElementId == el_empty_line); // Get any style height to be ensured below (just before we add bottom // padding when erm_block or erm_final) @@ -6066,12 +6085,34 @@ void renderBlockElementEnhanced( FlowState * flow, ldomNode * enode, int x, int // Nothing special to do: the child style height will be // enforced by subcall to renderBlockElement(child) } - else if ( is_hr || is_empty_line_elem || BLOCK_RENDERING(flags, ENSURE_STYLE_HEIGHT) ) { + else if ( is_hr || is_br_or_empty_line_elem || BLOCK_RENDERING(flags, ENSURE_STYLE_HEIGHT) ) { // We always use the style height for
, to actually have // a height to fill with its color style_height = style->height; style_height_base_em = em; apply_style_height = true; + if ( is_br_or_empty_line_elem && style_height.type == css_val_unspecified ) { + // No height specified: default to line-height, just like + // if it were rendered final. + int line_h; + if ( style->line_height.type == css_val_unspecified && + style->line_height.value == css_generic_normal ) { + line_h = enode->getFont()->getHeight(); // line-height: normal + } + else { + // In all other cases (%, em, unitless/unspecified), we can just + // scale 'em', and use the computed value for absolute sized + // values and 'rem' (related to root element font size). + line_h = lengthToPx(style->line_height, em, em, true); + } + // Scale line_h according to gInterlineScaleFactor, but not if + // it was already in screen_px, which means it has already been + // scaled (in setNodeStyle() when inherited). + if (style->line_height.type != css_val_screen_px && gInterlineScaleFactor != INTERLINE_SCALE_FACTOR_NO_SCALE) + line_h = (line_h * gInterlineScaleFactor) >> INTERLINE_SCALE_FACTOR_SHIFT; + style_height.value = line_h; + style_height.type = css_val_screen_px; + } } if ( apply_style_height && style_height.type != css_val_unspecified ) { if ( !BLOCK_RENDERING(flags, ALLOW_STYLE_W_H_ABSOLUTE_UNITS) && @@ -6079,7 +6120,7 @@ void renderBlockElementEnhanced( FlowState * flow, ldomNode * enode, int x, int style_height.type != css_val_ex && style_height.type != css_val_rem ) { apply_style_height = false; } - if ( is_hr || is_empty_line_elem || apply_style_height ) { + if ( is_hr || is_br_or_empty_line_elem || apply_style_height ) { style_h = lengthToPx( style_height, container_width, style_height_base_em ); if ( BLOCK_RENDERING(flags, USE_W3C_BOX_MODEL) ) { // If W3C box model requested, CSS height specifies the height @@ -6463,7 +6504,14 @@ void renderBlockElementEnhanced( FlowState * flow, ldomNode * enode, int x, int // Set direction for all blocks (needed for text in erm_final, but also for list item // markers in erm_block, so that DrawDocument can draw it on the right if rtl). RENDER_RECT_SET_DIRECTION(fmt, direction); - // todo: also set/store lang_node when we'll start implementing it + // Store lang node index if it's an erm_final like node (it's only needed for these, + // as the starting lang for renderFinalBlock()) + if ( m == erm_final || m == erm_table_caption || m == erm_list_item ) { + if ( has_lang_attribute ) + fmt.setLangNodeIndex( enode->getDataIndex() ); + else + fmt.setLangNodeIndex( flow->getLangNodeIndex() ); + } fmt.setX( x ); fmt.setY( flow->getCurrentRelativeY() ); fmt.setWidth( width ); @@ -6590,7 +6638,7 @@ void renderBlockElementEnhanced( FlowState * flow, ldomNode * enode, int x, int case erm_block: case erm_inline: // For inlineBox elements only { - if (m == erm_inline && enode->getNodeId() != el_inlineBox) { + if (m == erm_inline && nodeElementId != el_inlineBox) { printf("CRE WARNING: node discarded (unexpected erm_inline for elem %s)\n", UnicodeToLocal(ldomXPointer(enode, 0).toString()).c_str()); // (add %s and enode->getText8().c_str() to see text content) @@ -6658,7 +6706,7 @@ void renderBlockElementEnhanced( FlowState * flow, ldomNode * enode, int x, int margin_left + (is_rtl ? 0 : list_marker_padding) + padding_left, // d_left break_inside==RN_SPLIT_AVOID, direction, - has_lang_attribute ? enode : NULL); + has_lang_attribute ? enode->getDataIndex() : -1); if (padding_top>0) { // This may push accumulated vertical margin @@ -6783,7 +6831,10 @@ void renderBlockElementEnhanced( FlowState * flow, ldomNode * enode, int x, int pad_h = flow->getPageHeight(); // Add this space to the page splitting context // Allow page splitting inside this useless excessive style height - flow->addContentSpace(pad_h, 1, false, false, false); + // (Unless it's a
or that we're rather keep it + // all on a page (to avoid text line shifts and ghosting in interline) + bool split_avoid_inside = is_br_or_empty_line_elem; + flow->addContentSpace(pad_h, 1, false, split_avoid_inside, false); } } @@ -7227,7 +7278,7 @@ int renderBlockElement( LVRendPageContext & context, ldomNode * enode, int x, in // (We are called when rendering the root node, and when rendering each float // met along walking the root node hierarchy - and when meeting a new float // in a float, etc...) - FlowState flow( context, width, rend_flags, direction ); + FlowState flow( context, width, rend_flags, direction, TextLangMan::getLangNodeIndex(enode) ); if (baseline != NULL) { flow.setRequestedBaselineType(*baseline); } @@ -8351,6 +8402,8 @@ void setNodeStyle( ldomNode * enode, css_style_ref_t parent_style, LVFontRef par css_style_ref_t style( new css_style_rec_t ); css_style_rec_t * pstyle = style.get(); + lUInt16 nodeElementId = enode->getNodeId(); + if (gDOMVersionRequested < 20180524) { // The display property initial value has been changed from css_d_inherit // to css_d_inline (as per spec, and so that an unknown element does not @@ -8372,26 +8425,26 @@ void setNodeStyle( ldomNode * enode, css_style_ref_t parent_style, LVFontRef par // Account for backward incompatible changes in fb2def.h if (gDOMVersionRequested < 20180528) { // revert what was changed 20180528 - if (enode->getNodeId() == el_form) { + if (nodeElementId == el_form) { pstyle->display = css_d_none; // otherwise shown as block, as it may have textual content } - if (enode->getNodeId() == el_code) { + if (nodeElementId == el_code) { pstyle->white_space = css_ws_pre; // otherwise white-space: normal, as browsers do } - if (enode->getNodeId() >= el_address && enode->getNodeId() <= el_xmp) { // newly added block elements + if (nodeElementId >= el_address && nodeElementId <= el_xmp) { // newly added block elements pstyle->display = css_d_inline; // previously unknown and shown as inline if (gDOMVersionRequested < 20180524) { pstyle->display = css_d_inherit; // previously unknown and display: inherit } } if (gDOMVersionRequested < 20180524) { // revert what was fixed 20180524 - if (enode->getNodeId() == el_cite) { + if (nodeElementId == el_cite) { pstyle->display = css_d_block; // otherwise correctly set to css_d_inline } - if (enode->getNodeId() == el_li) { + if (nodeElementId == el_li) { pstyle->display = css_d_list_item; // otherwise correctly set to css_d_list_item_block } - if (enode->getNodeId() == el_style) { + if (nodeElementId == el_style) { pstyle->display = css_d_inline; // otherwise correctly set to css_d_none (hidden) } } @@ -8405,7 +8458,7 @@ void setNodeStyle( ldomNode * enode, css_style_ref_t parent_style, LVFontRef par // css_ta_left (as Firefox), but it's best in our context to use the // value set to the (or current DocFragment's) BODY node, which starts // with css_ta_left but may be set to css_ta_justify by our epub.css. - if (enode->getNodeId() == el_table) { + if (nodeElementId == el_table) { // To do as Firefox: // pstyle->text_align = css_ta_left; // But we'd rather use the BODY value: @@ -8418,7 +8471,7 @@ void setNodeStyle( ldomNode * enode, css_style_ref_t parent_style, LVFontRef par } if (enode->getNodeNsId() == ns_epub) { - if (enode->getNodeId() == el_case) { // + if (nodeElementId == el_case) { // // As we don't support any specific namespace (like MathML, SVG...), just // hide content - it must be followed by a // section with usually regular content like some image. @@ -8512,7 +8565,7 @@ void setNodeStyle( ldomNode * enode, css_style_ref_t parent_style, LVFontRef par // Ensure any element (that crengine "added BODY>stylesheet child // element with HEAD>STYLE&LINKS content") stays invisible (it could end up being // made visible when some book stylesheet contains "body > * {display: block;}") - if (enode->getNodeId() == el_stylesheet) { + if (nodeElementId == el_stylesheet) { pstyle->display = css_d_none; } @@ -8538,7 +8591,7 @@ void setNodeStyle( ldomNode * enode, css_style_ref_t parent_style, LVFontRef par } } if ( BLOCK_RENDERING_G(WRAP_FLOATS) ) { - if ( enode->getNodeId() == el_floatBox ) { + if ( nodeElementId == el_floatBox ) { // floatBox added, by initNodeRendMethod(), as a wrapper around // element with float:. // We want to set the floatBox->style->float_ to the same value @@ -8585,7 +8638,7 @@ void setNodeStyle( ldomNode * enode, css_style_ref_t parent_style, LVFontRef par if ( BLOCK_RENDERING_G(BOX_INLINE_BLOCKS) ) { // See above, same reasoning - if ( enode->getNodeId() == el_inlineBox ) { + if ( nodeElementId == el_inlineBox ) { // el_inlineBox are "display: inline" by default (defined in fb2def.h) if (enode->getChildCount() == 1) { ldomNode * child = enode->getChildNode(0); @@ -8919,11 +8972,12 @@ void getRenderedWidths(ldomNode * node, int &maxWidth, int &minWidth, int direct bool isStartNode = true; // we are starting measurement on that node // Start measurements and recursions: getRenderedWidths(node, maxWidth, minWidth, direction, ignoreMargin, rendFlags, - curMaxWidth, curWordWidth, collapseNextSpace, lastSpaceWidth, indent, isStartNode); + curMaxWidth, curWordWidth, collapseNextSpace, lastSpaceWidth, indent, NULL, isStartNode); } void getRenderedWidths(ldomNode * node, int &maxWidth, int &minWidth, int direction, bool ignoreMargin, int rendFlags, - int &curMaxWidth, int &curWordWidth, bool &collapseNextSpace, int &lastSpaceWidth, int indent, bool isStartNode) + int &curMaxWidth, int &curWordWidth, bool &collapseNextSpace, int &lastSpaceWidth, + int indent, TextLangCfg * lang_cfg, bool isStartNode) { // This does mostly what renderBlockElement, renderFinalBlock and lvtextfm.cpp // do, but only with widths and horizontal margin/border/padding and indent @@ -8940,6 +8994,15 @@ void getRenderedWidths(ldomNode * node, int &maxWidth, int &minWidth, int direct if (m == erm_invisible) return; + if ( isStartNode ) { + lang_cfg = TextLangMan::getTextLangCfg( node ); // Fetch it from node or its parents + } + else if ( node->hasAttribute( attr_lang ) ) { + lString16 lang_tag = node->getAttributeValue( attr_lang ); + if ( !lang_tag.empty() ) + lang_cfg = TextLangMan::getTextLangCfg( lang_tag ); + } + if ( isStartNode && node->isBoxingInlineBox() ) { // The inlineBox is erm_inline, and we'll be measuring it below // as part of measuring other erm_inline in some erm_final. @@ -9070,7 +9133,7 @@ void getRenderedWidths(ldomNode * node, int &maxWidth, int &minWidth, int direct // Nothing more to do with inline elements: they just carry some // styles that will be grabbed by children text nodes getRenderedWidths(child, maxWidth, minWidth, direction, false, rendFlags, - curMaxWidth, curWordWidth, collapseNextSpace, lastSpaceWidth, indent); + curMaxWidth, curWordWidth, collapseNextSpace, lastSpaceWidth, indent, lang_cfg); } return; } @@ -9186,7 +9249,7 @@ void getRenderedWidths(ldomNode * node, int &maxWidth, int &minWidth, int direct for (int i = 0; i < node->getChildCount(); i++) { ldomNode * child = node->getChildNode(i); getRenderedWidths(child, _maxWidth, _minWidth, direction, false, rendFlags, - curMaxWidth, curWordWidth, collapseNextSpace, lastSpaceWidth, indent); + curMaxWidth, curWordWidth, collapseNextSpace, lastSpaceWidth, indent, lang_cfg); // A
can happen deep among our children, so we deal with that when erm_inline above } if (lastSpaceWidth) @@ -9215,7 +9278,7 @@ void getRenderedWidths(ldomNode * node, int &maxWidth, int &minWidth, int direct int _minw = 0; ldomNode * child = node->getChildNode(i); getRenderedWidths(child, _maxw, _minw, direction, false, rendFlags, - curMaxWidth, curWordWidth, collapseNextSpace, lastSpaceWidth, indent); + curMaxWidth, curWordWidth, collapseNextSpace, lastSpaceWidth, indent, lang_cfg); if (m == erm_table_row) { // For table rows, adding the min/max widths of each children // (the table cells), instead of taking the largest, gives @@ -9370,6 +9433,18 @@ void getRenderedWidths(ldomNode * node, int &maxWidth, int &minWidth, int direct // getAdditionalCharWidthOnLeft(). // todo: use fribidi and split measurement at fribidi level change, // and beware left/right side bearing adjustments... + #if (USE_LIBUNIBREAK==1) + // If using libunibreak, we do similarly as in lvtextfm.cpp copyText(), + // except that we don't update previous char, but look ahead at next + // char to know about current break. + // Also, as we do all that only text node by text node, we may lose + // line breaking rules between contiguous text nodes (but it's a bit + // complicated to pass this lbCtx across calls...) + struct LineBreakContext lbCtx; + lb_init_break_context(&lbCtx, 0x0020, NULL); + lbCtx.lbpLang = lang_cfg->getLBProps(); + lb_process_next_char(&lbCtx, (utf32_t)(*txt)); + #endif while (true) { LVFont * font = node->getParentNode()->getFont().get(); int chars_measured = font->measureText( @@ -9378,9 +9453,82 @@ void getRenderedWidths(ldomNode * node, int &maxWidth, int &minWidth, int direct widths, flags, 0x7FFF, // very wide width '?', // replacement char + lang_cfg, letter_spacing, false); // no hyphenation // todo: provide direction and hints + #if (USE_LIBUNIBREAK==1) + for (int i=0; i0 ? widths[i-1] : 0); + lChar16 c = *(txt + start + i); + lChar16 next_c = *(txt + start + i + 1); // might be 0 at end of string + if ( lang_cfg->hasLBCharSubFunc() ) { + next_c = lang_cfg->getLBCharSubFunc()(txt+start, i+1, len-1 - (i+1)); + } + int brk = lb_process_next_char(&lbCtx, (utf32_t)next_c); + // We don't need to bother with collapsing consecutive spaces, as + // we're dealing with a single text node, and the HTML parser has + // removed multiple consecutive spaces (except with PRE, that we + // already did not handle correctly when !USE_LIBUNIBREAK). + // printf("between <%c%c>: brk %d\n", c, next_c, brk); + if (brk == LINEBREAK_ALLOWBREAK) { + if (flags[i] & LCHAR_IS_SPACE) { // A space + if (collapseNextSpace) // ignore this space + continue; + collapseNextSpace = true; // ignore next spaces, even if in another node + lastSpaceWidth = w; + curMaxWidth += w; // add this space to non-wrap width + if (curWordWidth > 0) { // there was a word before this space + if (start+i > 0) { + // adjust for last word's last char overflow (italic, letter f...) + lChar16 prevc = *(txt + start + i - 1); + int right_overflow = - font->getRightSideBearing(prevc, true, true); + curWordWidth += right_overflow; + } + } + if (curWordWidth > minWidth) // done with previous word + minWidth = curWordWidth; // longest word found + curWordWidth = 0; + } + else { // break after a non space: might be a CJK char (or other stuff) + collapseNextSpace = false; // next space should not be ignored + lastSpaceWidth = 0; // no width to take off if we stop with this char + curMaxWidth += w; + if (curWordWidth > 0) { // there was a word or CJK char before this CJK char + if (start+i > 0) { + // adjust for last word's last char or previous CJK char right overflow + lChar16 prevc = *(txt + start + i - 1); + int right_overflow = - font->getRightSideBearing(prevc, true, true); + curWordWidth += right_overflow; + } + } + if (curWordWidth > minWidth) // done with previous word + minWidth = curWordWidth; // longest word found + curWordWidth = w; + // adjust for leading overflow + int left_overflow = - font->getLeftSideBearing(c, false, true); + curWordWidth += left_overflow; + if (start + i == 0) // at start of text only? (not sure) + curMaxWidth += left_overflow; // also add it to max width + } + } + else { // break not allowed: this char is part of a word + collapseNextSpace = false; // next space should not be ignored + lastSpaceWidth = 0; // no width to take off if we stop with this char + if (curWordWidth == 0) { // first char of a word + // adjust for leading overflow on first char of a word + int left_overflow = - font->getLeftSideBearing(c, false, true); + curWordWidth += left_overflow; + if (start + i == 0) // at start of text only? (not sure) + curMaxWidth += left_overflow; // also add it to max width + } + curMaxWidth += w; + curWordWidth += w; + // libunibreak should handle properly '/' in urls (except may be + // if the url parts are made of numbers...) + } + } + #else // not USE_LIBUNIBREAK==1 for (int i=0; i0 ? widths[i-1] : 0); lChar16 c = *(txt + start + i); @@ -9456,6 +9604,7 @@ void getRenderedWidths(ldomNode * node, int &maxWidth, int &minWidth, int direct } } } + #endif // not USE_LIBUNIBREAK==1 if ( chars_measured == len ) { // done with this text node if (curWordWidth > 0) { // we end with a word if (start+len > 0) { diff --git a/crengine/src/lvtextfm.cpp b/crengine/src/lvtextfm.cpp index 9139ca450..45e364250 100755 --- a/crengine/src/lvtextfm.cpp +++ b/crengine/src/lvtextfm.cpp @@ -24,6 +24,7 @@ #include "../include/lvimg.h" #include "../include/lvtinydom.h" #include "../include/lvrend.h" +#include "../include/textlang.h" #endif #if USE_HARFBUZZ==1 @@ -198,6 +199,7 @@ void lvtextFreeFormatter( formatted_text_fragment_t * pbuffer ) void lvtextAddSourceLine( formatted_text_fragment_t * pbuffer, lvfont_handle font, /* handle of font to draw string */ + TextLangCfg * lang_cfg, const lChar16 * text, /* pointer to unicode text string */ lUInt32 len, /* number of chars in text, 0 for auto(strlen) */ lUInt32 color, /* color */ @@ -227,6 +229,9 @@ void lvtextAddSourceLine( formatted_text_fragment_t * pbuffer, // if (font == NULL && ((flags & LTEXT_WORD_IS_OBJECT) == 0)) { // CRLog::fatal("No font specified for text"); // } + if ( !lang_cfg ) + lang_cfg = TextLangMan::getTextLangCfg(); // use main_lang + pline->lang_cfg = lang_cfg; if (!len) for (len=0; text[len]; len++) ; if (flags & LTEXT_FLAG_OWNTEXT) { @@ -260,6 +265,7 @@ void lvtextAddSourceObject( lInt16 valign_dy, /* drift y from baseline */ lInt16 indent, /* first line indent (or all but first, when negative) */ void * object, /* pointer to custom object */ + TextLangCfg * lang_cfg, lInt16 letter_spacing ) { @@ -279,6 +285,9 @@ void lvtextAddSourceObject( pline->interval = interval; pline->valign_dy = valign_dy; pline->letter_spacing = letter_spacing; + if ( !lang_cfg ) + lang_cfg = TextLangMan::getTextLangCfg(); // use main_lang + pline->lang_cfg = lang_cfg; } @@ -298,6 +307,7 @@ void LFormattedText::AddSourceObject( lInt16 valign_dy, /* drift y from baseline */ lInt16 indent, /* first line indent (or all but first, when negative) */ void * object, /* pointer to custom object */ + TextLangCfg * lang_cfg, lInt16 letter_spacing ) { @@ -310,7 +320,7 @@ void LFormattedText::AddSourceObject( if (flags & LTEXT_SRC_IS_FLOAT) { // not an image but a float:'ing node // Nothing much to do with it at this point lvtextAddSourceObject(m_pbuffer, 0, 0, - flags, interval, valign_dy, indent, object, letter_spacing ); + flags, interval, valign_dy, indent, object, lang_cfg, letter_spacing ); // lvtextAddSourceObject will itself add to flags: | LTEXT_SRC_IS_OBJECT // (only flags & object parameter will be used, the others are not, // but they matter if this float is the first node in a paragraph, @@ -322,7 +332,7 @@ void LFormattedText::AddSourceObject( // get its width & neight, as they might be in % of our main width, that // we don't know yet (but only when ->Format() is called). lvtextAddSourceObject(m_pbuffer, 0, 0, - flags, interval, valign_dy, indent, object, letter_spacing ); + flags, interval, valign_dy, indent, object, lang_cfg, letter_spacing ); // lvtextAddSourceObject will itself add to flags: | LTEXT_SRC_IS_OBJECT return; } @@ -368,7 +378,7 @@ void LFormattedText::AddSourceObject( height = h; lvtextAddSourceObject(m_pbuffer, width, height, - flags, interval, valign_dy, indent, object, letter_spacing ); + flags, interval, valign_dy, indent, object, lang_cfg, letter_spacing ); } class LVFormatter { @@ -380,6 +390,9 @@ class LVFormatter { int m_size; bool m_staticBufs; static bool m_staticBufs_inUse; + #if (USE_LIBUNIBREAK==1) + static bool m_libunibreak_init_done; + #endif lChar16 * m_text; lUInt16 * m_flags; src_text_fragment_t * * m_srcs; @@ -421,6 +434,13 @@ class LVFormatter { LVFormatter(formatted_text_fragment_t * pbuffer) : m_pbuffer(pbuffer), m_length(0), m_size(0), m_staticBufs(true), m_y(0) { + #if (USE_LIBUNIBREAK==1) + if (!m_libunibreak_init_done) { + m_libunibreak_init_done = true; + // Have libunibreak build up a few lookup tables for quicker computation + init_linebreak(); + } + #endif if (m_staticBufs_inUse) m_staticBufs = false; m_text = NULL; @@ -902,6 +922,17 @@ class LVFormatter { /// copy text of current paragraph to buffers void copyText( int start, int end ) { + #if (USE_LIBUNIBREAK==1) + struct LineBreakContext lbCtx; + // Let's init it before the first char, by adding a leading space which + // will be treated as WJ (Word Joiner, non-breakable) and should not + // change the behaviour with the real first char coming up. We then + // can just use lb_process_next_char() with the real text. + // The lang lb_props will be plugged in from the TextLangCfg of the + // coming up text node. + lb_init_break_context(&lbCtx, 0x0020, NULL); + #endif + m_has_bidi = false; // will be set if fribidi detects it is bidirectionnal text m_para_dir_is_rtl = false; bool has_rtl = false; // if no RTL char, no need for expensive bidi processing @@ -940,6 +971,7 @@ class LVFormatter { // to change what we did for floats to use a new flag. pos++; // No need to update prev_was_space or last_non_space_pos + // No need for libunibreak object replacement character } else if ( src->flags & LTEXT_SRC_IS_INLINE_BOX ) { // Note: we shouldn't meet any EmbeddedBlock inlineBox here (and in @@ -947,7 +979,20 @@ class LVFormatter { // with specifically in splitParagraphs() by processEmbeddedBlock(). m_text[pos] = 0; m_srcs[pos] = src; - m_flags[pos] = LCHAR_IS_OBJECT | LCHAR_ALLOW_WRAP_AFTER; + m_flags[pos] = LCHAR_IS_OBJECT; + #if (USE_LIBUNIBREAK==1) + // Let libunibreak know there was an object, for the followup text + // to set LCHAR_ALLOW_WRAP_AFTER on it. + // (it will allow wrap before and after an object, unless it's near + // some punctuation/quote/paren, whose rules will be ensured it seems). + int brk = lb_process_next_char(&lbCtx, (utf32_t)0xFFFC); // OBJECT REPLACEMENT CHARACTER + if (brk == LINEBREAK_ALLOWBREAK) + m_flags[pos-1] |= LCHAR_ALLOW_WRAP_AFTER; + else + m_flags[pos-1] &= ~LCHAR_ALLOW_WRAP_AFTER; + #else + m_flags[pos] |= LCHAR_ALLOW_WRAP_AFTER; + #endif m_charindex[pos] = INLINEBOX_CHAR_INDEX; //0xFFFD; last_non_space_pos = pos; prev_was_space = false; @@ -956,13 +1001,29 @@ class LVFormatter { else if ( src->flags & LTEXT_SRC_IS_OBJECT ) { m_text[pos] = 0; m_srcs[pos] = src; - m_flags[pos] = LCHAR_IS_OBJECT | LCHAR_ALLOW_WRAP_AFTER; + m_flags[pos] = LCHAR_IS_OBJECT; + #if (USE_LIBUNIBREAK==1) + // Let libunibreak know there was an object + int brk = lb_process_next_char(&lbCtx, (utf32_t)0xFFFC); // OBJECT REPLACEMENT CHARACTER + if (brk == LINEBREAK_ALLOWBREAK) + m_flags[pos-1] |= LCHAR_ALLOW_WRAP_AFTER; + else + m_flags[pos-1] &= ~LCHAR_ALLOW_WRAP_AFTER; + #else + m_flags[pos] |= LCHAR_ALLOW_WRAP_AFTER; + #endif m_charindex[pos] = OBJECT_CHAR_INDEX; //0xFFFF; last_non_space_pos = pos; prev_was_space = false; pos++; } else { + #if (USE_LIBUNIBREAK==1) + // We hack into lbCtx private member and switch its lbpLang + // on-the-fly to the props for a possibly new language. + lbCtx.lbpLang = src->lang_cfg->getLBProps(); + #endif + int len = src->t.len; lStr_ncpy( m_text+pos, src->t.text, len ); if ( i==0 || (src->flags & LTEXT_FLAG_NEWLINE) ) @@ -1028,9 +1089,12 @@ class LVFormatter { lChar16 c = m_text[pos]; bool is_space = (c == ' '); - if ( is_space && prev_was_space && !preformatted ) { + if ( is_space && prev_was_space && !preformatted && src->object ) { // On non-pre paragraphs, flag spaces following a space // so we can discard them later. + // (But only if the space is from a document text node (it then + // has a non-NULL ->object), to keep those we added for empty + // lines or identation with 'txform->AddSourceLine(L" "...)'.) m_flags[pos] = LCHAR_IS_COLLAPSED_SPACE | LCHAR_ALLOW_WRAP_AFTER; // m_text[pos] = '_'; // uncomment when debugging // (We can replace the char to see it in printf() (m_text is not the @@ -1099,6 +1163,58 @@ class LVFormatter { printf("control char %x\n", c); */ + #if (USE_LIBUNIBREAK==1) + lChar16 ch = m_text[pos]; + if ( src->lang_cfg->hasLBCharSubFunc() ) { + // Lang specific function may want to substitute char (for + // libunibreak only) to tweak line breaking around it + ch = src->lang_cfg->getLBCharSubFunc()(m_text, pos, len-1 - k); + } + int brk = lb_process_next_char(&lbCtx, (utf32_t)ch); + // printf("between <%c%c>: brk %d\n", m_text[pos-1], m_text[pos], brk); + if (brk != LINEBREAK_ALLOWBREAK) { + m_flags[pos-1] &= ~LCHAR_ALLOW_WRAP_AFTER; + } + else { + m_flags[pos-1] |= LCHAR_ALLOW_WRAP_AFTER; + // brk is set on the last space in a sequence of multiple spaces. + // between : brk 2 + // between : brk 2 + // between : brk 2 + // between <. >: brk 2 + // between < >: brk 2 + // between < >: brk 2 + // between < T>: brk 1 + // between : brk 2 + // between : brk 2 + // between : brk 2 + // between : brk 2 + // between < >: brk 2 + // between < h>: brk 1 + // between : brk 2 + // between : brk 2 + // between : brk 2 + // between : brk 2 + // between < a>: brk 1 + // between : brk 2 + // Given the algorithm described in addLine(), we want the break + // after the first space, so the following collapsed spaces can + // be at start of next line where they will be ignored. + // (Not certain this is really needed, but let's do it, as the + // code expecting that has been quite well tested and fixed over + // the months, so let's avoid adding uncertainty.) + if ( m_flags[pos-1] & LCHAR_IS_COLLAPSED_SPACE ) { + // We have spaces before, and if we are allowed to break, + // the break is allowed on all preceeding spaces. + int j = pos-2; + while ( j >= 0 && ( (m_flags[j] & LCHAR_IS_COLLAPSED_SPACE) || m_text[j] == ' ' ) ) { + m_flags[j] |= LCHAR_ALLOW_WRAP_AFTER; + j--; + } + } + } + #endif + #if (USE_FRIBIDI==1) // Also try to detect if we have RTL chars, so that if we don't have any, // we don't need to invoke expensive fribidi processing below (which @@ -1349,6 +1465,7 @@ class LVFormatter { widths, flags, 0x7FFF, '?', + srcline->lang_cfg, srcline->letter_spacing, false, hints ); @@ -1495,6 +1612,7 @@ class LVFormatter { widths, flags, 0x7FFF, //pbuffer->width, '?', + lastSrc->lang_cfg, lastLetterSpacing, false, hints @@ -1557,6 +1675,12 @@ class LVFormatter { widths[k] -= cumulative_width_removed; } m_widths[start + k] = lastWidth + widths[k]; + #if (USE_LIBUNIBREAK==1) + // Reset these flags if lastFont->measureText() has set them, as we trust + // only libunibreak (which is more clever with hyphens, that our code flag + // with LCHAR_DEPRECATED_WRAP_AFTER). + flags[k] &= ~(LCHAR_ALLOW_WRAP_AFTER|LCHAR_DEPRECATED_WRAP_AFTER); + #endif m_flags[start + k] |= flags[k]; // printf(" => w=%d\n", m_widths[start + k]); } @@ -2968,7 +3092,9 @@ class LVFormatter { // split paragraph into lines, export lines int pos = 0; + #if (USE_LIBUNIBREAK!=1) int upSkipPos = -1; + #endif // int minWidth = 0; // Not per-specs, but when floats reduce the available width, skip y until @@ -3129,7 +3255,7 @@ class LVFormatter { // but it should be a candidate for lastNormalWrap (otherwise, the // previous word will be hyphenated and we will get spaces widen for // text justification) - if ( (flags & LCHAR_ALLOW_WRAP_AFTER) && !(flags & LCHAR_IS_OBJECT) ) // don't break yet + if ( (flags & LCHAR_IS_SPACE) && (flags & LCHAR_ALLOW_WRAP_AFTER) ) // don't break yet grabbedExceedingSpace = true; else break; @@ -3140,6 +3266,12 @@ class LVFormatter { // || lGetCharProps(m_text[i]) == 0 // but this does not look right, as any other unicode char would allow wrap. // + #if (USE_LIBUNIBREAK==1) + // Note: with libunibreak, we can't assume anymore that LCHAR_ALLOW_WRAP_AFTER is synonym to IS_SPACE. + if (flags & LCHAR_ALLOW_WRAP_AFTER) { + lastNormalWrap = i; + } + #else // A space or a CJK ideograph make a normal allowed wrap if ((flags & LCHAR_ALLOW_WRAP_AFTER) || isCJKIdeograph(m_text[i])) { // Need to check if previous and next non-space char request a wrap on @@ -3181,12 +3313,13 @@ class LVFormatter { // Note that a wrap can happen AFTER a '-' (that has CH_PROP_AVOID_WRAP_AFTER) // when lastDeprecatedWrap is prefered below. } + #endif // not USE_LIBUNIBREAK==1 else if ( i==m_length-1 ) // Last char lastNormalWrap = i; else if ( flags & LCHAR_DEPRECATED_WRAP_AFTER ) // Hyphens make a less priority wrap lastDeprecatedWrap = i; else if ( flags & LCHAR_ALLOW_HYPH_WRAP_AFTER ) // can't happen at this point as we haven't - lastHyphWrap = i; // gone thru HyphMan::hyphenate() + lastHyphWrap = i; // gone thru hyphenate() if ( !grabbedExceedingSpace && m_pbuffer->min_space_condensing_percent != 100 && i < m_length-1 && @@ -3280,7 +3413,7 @@ class LVFormatter { // We have a valid word to look for hyphenation if ( len > MAX_WORD_SIZE ) // hyphenate() stops/truncates at 64 chars len = MAX_WORD_SIZE; - // HyphMan::hyphenate(), which is used by some other parts of the code, + // ->hyphenate(), which is used by some other parts of the code, // expects a lUInt8 array. We added flagSize=1|2 so it can set the correct // flags on our upgraded (from lUInt8 to lUInt16) m_flags. lUInt8 * flags = (lUInt8*) (m_flags + wstart); @@ -3303,7 +3436,8 @@ class LVFormatter { break; } } - if ( HyphMan::hyphenate(m_text+wstart, len, widths, flags, _hyphen_width, max_width, 2) ) { + // Use the hyph method of the source node that contains wordpos + if ( m_srcs[wordpos]->lang_cfg->getHyphMethod()->hyphenate(m_text+wstart, len, widths, flags, _hyphen_width, max_width, 2) ) { // We need to reset the flag for the multiple hyphenation // opportunities we will not be using (or they could cause // spurious spaces, as a word here may be multiple words @@ -3347,6 +3481,7 @@ class LVFormatter { wrapPos = lastNormalWrap; if ( wrapPos<0 ) wrapPos = i-1; + #if (USE_LIBUNIBREAK!=1) if ( wrapPos<=upSkipPos ) { // Ensure that what, when dealing with previous line, we pushed to // next line (below) is actually on this new line. @@ -3355,9 +3490,14 @@ class LVFormatter { //CRLog::trace("guard new wrapPos at %d", wrapPos); upSkipPos = -1; } + #endif } bool needReduceSpace = true; // todo: calculate whether space reducing required int endp = wrapPos+(lastMandatoryWrap<0 ? 1 : 0); + + // Specific handling of CJK punctuation that should not happen at start or + // end of line. When using libunibreak, we trust it to handle them correctly. + #if (USE_LIBUNIBREAK!=1) // The following looks left (up) and right (down) if there are any chars/punctuation // that should be prevented from being at the end of line or start of line, and if // yes adjust wrapPos so they are pushed to next line, or brought to this line. @@ -3404,6 +3544,8 @@ class LVFormatter { //CRLog::trace("finally up skip punctuations %d", upSkipCount); } } + #endif + // Best position to end this line found. // We need to possibly extend the last char width to account for italic // right side bearing overflow (but not if we ended the line with some @@ -3435,6 +3577,16 @@ class LVFormatter { endp = m_length; addLine(pos, endp, x + firstCharMargin, para, interval, pos==0, wrapPos>=m_length-1, preFormattedOnly, needReduceSpace, isLastPara); pos = wrapPos + 1; + #if (USE_LIBUNIBREAK==1) + // (Only when using libunibreak, which we trust decisions to wrap on hyphens.) + if ( m_srcs[wrapPos]->lang_cfg->duplicateRealHyphenOnNextLine() && pos > 0 && pos < m_length-1 ) { + if ( m_text[wrapPos] == '-' || m_text[wrapPos] == UNICODE_HYPHEN ) { + pos--; // Have that last hyphen also at the start of next line + // (small caveat: the duplicated hyphen at start of next + // line won't be part of the highlighted text) + } + } + #endif } } @@ -3643,6 +3795,9 @@ class LVFormatter { }; bool LVFormatter::m_staticBufs_inUse = false; +#if (USE_LIBUNIBREAK==1) +bool LVFormatter::m_libunibreak_init_done = false; +#endif static void freeFrmLines( formatted_text_fragment_t * m_pbuffer ) { @@ -4115,6 +4270,7 @@ void LFormattedText::Draw( LVDrawBuf * buf, int x, int y, ldomMarkedRangeList * '?', NULL, flgHyphen, + srcline->lang_cfg, drawFlags, srcline->letter_spacing, word->width, diff --git a/crengine/src/lvtinydom.cpp b/crengine/src/lvtinydom.cpp index 9c7488bc6..c1db25355 100644 --- a/crengine/src/lvtinydom.cpp +++ b/crengine/src/lvtinydom.cpp @@ -84,9 +84,9 @@ int gDOMVersionRequested = DOM_VERSION_CURRENT; /// change in case of incompatible changes in swap/cache file format to avoid using incompatible swap file // increment to force complete reload/reparsing of old file -#define CACHE_FILE_FORMAT_VERSION "3.05.38k" +#define CACHE_FILE_FORMAT_VERSION "3.05.39k" /// increment following value to force re-formatting of old book after load -#define FORMATTING_VERSION_ID 0x0021 +#define FORMATTING_VERSION_ID 0x0022 #ifndef DOC_DATA_COMPRESSION_LEVEL /// data compression level (0=no compression, 1=fast compressions, 3=normal compression) @@ -387,7 +387,7 @@ lUInt32 calcGlobalSettingsHash(int documentId) hash = hash * 75 + 2384761; if ( gFlgFloatingPunctuationEnabled ) hash = hash * 75 + 1761; - hash = hash * 31 + (HyphMan::getSelectedDictionary()!=NULL ? HyphMan::getSelectedDictionary()->getHash() : 123 ); + hash = hash * 31 + TextLangMan::getHash(); hash = hash * 31 + HyphMan::getLeftHyphenMin(); hash = hash * 31 + HyphMan::getRightHyphenMin(); hash = hash * 31 + HyphMan::getTrustSoftHyphens(); @@ -1700,6 +1700,31 @@ void RenderRectAccessor::setListPropNodeIndex( int idx ) _modified = true; } } +int RenderRectAccessor::getLangNodeIndex() +{ + if ( _dirty ) { + _dirty = false; + _node->getRenderData(*this); +#ifdef DEBUG_RENDER_RECT_ACCESS + rr_lock( _node ); +#endif + } + return _lang_node_idx; +} +void RenderRectAccessor::setLangNodeIndex( int idx ) +{ + if ( _dirty ) { + _dirty = false; + _node->getRenderData(*this); +#ifdef DEBUG_RENDER_RECT_ACCESS + rr_lock( _node ); +#endif + } + if ( _lang_node_idx != idx ) { + _lang_node_idx = idx; + _modified = true; + } +} unsigned short RenderRectAccessor::getFlags() { if ( _dirty ) { @@ -3797,8 +3822,10 @@ static void writeNodeEx( LVStream * stream, ldomNode * node, lString16Collection // We have a valid word to look for hyphenation if ( len > HYPH_MAX_WORD_SIZE ) // hyphenate() stops/truncates at 64 chars len = HYPH_MAX_WORD_SIZE; - // Have HyphMan set flags inside 'flags' - HyphMan::hyphenate(text16+start, len, widths, flags+start, 0, 0xFFFF, 1); + // Have hyphenate() set flags inside 'flags' + // (Fetching the lang_cfg for each text node is not really cheap, but + // it's easier than having to pass it to each writeNodeEx()) + TextLangMan::getTextLangCfg(node)->getHyphMethod()->hyphenate(text16+start, len, widths, flags+start, 0, 0xFFFF, 1); // Continue with previous word wordpos = start - 1; } @@ -5940,12 +5967,13 @@ void ldomNode::initNodeRendMethod() } j++; // j..i are inline - if ( j>0 || i<(int)getChildCount()-1 ) + if ( j>0 || i<(int)getChildCount()-1 ) { // Avoid crash: we can't add/move nodes when a cache file exists if ( getDocument()->hasCacheFile() ) getDocument()->setBoxingWishedButPreventedByCache(); else autoboxChildren( j, i, handleFloating ); + } i = j; } else if ( i>0 ) { @@ -7728,7 +7756,7 @@ ldomXPointer ldomDocument::createXPointer( lvPoint pt, int direction, bool stric lUInt32 hints = WORD_FLAGS_TO_FNT_FLAGS(word->flags); font->measureText( str.c_str()+word->t.start, word->t.len, width, flg, - word->width+50, '?', src->letter_spacing, false, hints); + word->width+50, '?', src->lang_cfg, src->letter_spacing, false, hints); bool word_is_rtl = word->flags & LTEXT_WORD_DIRECTION_IS_RTL; if ( word_is_rtl ) { @@ -8080,6 +8108,7 @@ bool ldomXPointer::getRect(lvRect & rect, bool extended, bool adjusted) const flg, word->width+50, '?', + txtform->GetSrcInfo(srcIndex)->lang_cfg, txtform->GetSrcInfo(srcIndex)->letter_spacing, false, hints); @@ -8248,6 +8277,7 @@ bool ldomXPointer::getRect(lvRect & rect, bool extended, bool adjusted) const flg, word->width+50, '?', + txtform->GetSrcInfo(srcIndex)->lang_cfg, txtform->GetSrcInfo(srcIndex)->letter_spacing, false, hints ); @@ -8719,7 +8749,7 @@ lString16 ldomXPointer::toStringV2() // same element name, so we can have "div[1]" instead of "div" // when parent has more than one of it (as toStringV1 does). ldomNode * n = p; - while ( n = n->getUnboxedNextSibling(true) ) { + while ( (n = n->getUnboxedNextSibling(true)) ) { if ( predicat(n) ) { // We have such a followup sibling count = 2; // there's at least 2 of them break; @@ -8741,7 +8771,7 @@ lString16 ldomXPointer::toStringV2() // so we can have "text()[1]" instead of "text()" when // parent has more than one text node (as toStringV1 does). ldomNode * n = p; - while ( n = n->getUnboxedNextSibling(false) ) { + while ( (n = n->getUnboxedNextSibling(false)) ) { if ( isTextNode(n) ) { // We have such a followup sibling count = 2; // there's at least 2 of them break; @@ -9989,9 +10019,10 @@ void ldomXRange::getSegmentRects( LVArray & rects ) curPos.setOffset(startOffset); prevCharRect = nodeStartRect; for (int i=startOffset+1; i<=textLen-1; i++) { - // skip spaces and soft-hyphens + // skip spaces (but let soft-hyphens in, so they are part of the + // highlight when they are shown at end of line) lChar16 c = nodeText[i]; - if (c == ' ' || c == 0x00AD) + if (c == ' ') // || c == 0x00AD) continue; curPos.setOffset(i); curCharRect = lvRect(); // reset @@ -13008,7 +13039,6 @@ lUInt32 tinyNodeCollection::calcStyleHash() { CRLog::debug("calcStyleHash start"); // int maxlog = 20; - int count = ((_elemCount+TNC_PART_LEN-1) >> TNC_PART_SHIFT); lUInt32 res = 0; //_elemCount; lUInt32 globalHash = calcGlobalSettingsHash(getFontContextDocIndex()); lUInt32 docFlags = getDocFlags(); @@ -13028,6 +13058,7 @@ lUInt32 tinyNodeCollection::calcStyleHash() // we should invalidate the cache so a new correct DOM is build on load. _nodeDisplayStyleHash = 0; + int count = ((_elemCount+TNC_PART_LEN-1) >> TNC_PART_SHIFT); for ( int i=0; igetTextWidth((marker + " ").c_str(), marker.length()+2) + font->getSize()/8; + TextLangCfg * lang_cfg = TextLangMan::getTextLangCfg( this ); + markerWidth = font->getTextWidth((marker + " ").c_str(), marker.length()+2, lang_cfg) + font->getSize()/8; res = true; } else { marker.clear(); @@ -16232,7 +16264,18 @@ LVStreamRef ldomDocument::getObjectImageStream( lString16 refName ) LVStreamRef ref; if ( refName.startsWith(lString16(BLOB_NAME_PREFIX)) ) { return _blobCache.getBlob(refName); - } if ( refName[0]!='#' ) { + } + if ( refName.length() > 10 && refName[4] == ':' && refName.startsWith(lString16("data:image/")) ) { + // 0 ) { + lString8 b64data = UnicodeToLocal(refName.substr(pos+8)); + ref = LVStreamRef(new LVBase64Stream(b64data)); + return ref; + } + } + if ( refName[0]!='#' ) { if ( !getContainer().isNull() ) { lString16 name = refName; if ( !getCodeBase().empty() ) @@ -16358,7 +16401,9 @@ int ldomNode::renderFinalBlock( LFormattedTextRef & frmtext, RenderRectAccessor /// render whole node content as single formatted object int direction = RENDER_RECT_PTR_GET_DIRECTION(fmt); int flags = styleToTextFmtFlags( getStyle(), 0, direction ); - ::renderFinalBlock( this, f.get(), fmt, flags, 0, -1 ); + int lang_node_idx = fmt->getLangNodeIndex(); + TextLangCfg * lang_cfg = TextLangMan::getTextLangCfg(lang_node_idx>0 ? getDocument()->getTinyNode(lang_node_idx) : NULL); + ::renderFinalBlock( this, f.get(), fmt, flags, 0, -1, lang_cfg ); cache.set( this, f ); bool flg=gFlgFloatingPunctuationEnabled; if (this->getNodeName()=="th"||this->getNodeName()=="td"|| diff --git a/crengine/src/lvxml.cpp b/crengine/src/lvxml.cpp index 363490f6c..a8f49ef5c 100644 --- a/crengine/src/lvxml.cpp +++ b/crengine/src/lvxml.cpp @@ -3164,262 +3164,2138 @@ bool LVXMLParser::Parse() typedef struct { const wchar_t * name; wchar_t code; + wchar_t code2; } ent_def_t; +// From https://html.spec.whatwg.org/multipage/named-characters.html#named-character-references +// Also see https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references +// Note that some entities translate to 2 codepoints, so code2=0 for those that do not. static const ent_def_t def_entity_table[] = { -{L"nbsp", 160}, -{L"iexcl", 161}, -{L"cent", 162}, -{L"pound", 163}, -{L"curren", 164}, -{L"yen", 165}, -{L"brvbar", 166}, -{L"sect", 167}, -{L"uml", 168}, -{L"copy", 169}, -{L"ordf", 170}, -{L"laquo", 171}, -{L"not", 172}, -{L"shy", 173}, -{L"reg", 174}, -{L"macr", 175}, -{L"deg", 176}, -{L"plusmn", 177}, -{L"sup2", 178}, -{L"sup3", 179}, -{L"acute", 180}, -{L"micro", 181}, -{L"para", 182}, -{L"middot", 183}, -{L"cedil", 184}, -{L"sup1", 185}, -{L"ordm", 186}, -{L"raquo", 187}, -{L"frac14", 188}, -{L"frac12", 189}, -{L"frac34", 190}, -{L"iquest", 191}, -{L"Agrave", 192}, -{L"Aacute", 193}, -{L"Acirc", 194}, -{L"Atilde", 195}, -{L"Auml", 196}, -{L"Aring", 197}, -{L"AElig", 198}, -{L"Ccedil", 199}, -{L"Egrave", 200}, -{L"Eacute", 201}, -{L"Ecirc", 202}, -{L"Euml", 203}, -{L"Igrave", 204}, -{L"Iacute", 205}, -{L"Icirc", 206}, -{L"Iuml", 207}, -{L"ETH", 208}, -{L"Ntilde", 209}, -{L"Ograve", 210}, -{L"Oacute", 211}, -{L"Ocirc", 212}, -{L"Otilde", 213}, -{L"Ouml", 214}, -{L"times", 215}, -{L"Oslash", 216}, -{L"Ugrave", 217}, -{L"Uacute", 218}, -{L"Ucirc", 219}, -{L"Uuml", 220}, -{L"Yacute", 221}, -{L"THORN", 222}, -{L"szlig", 223}, -{L"agrave", 224}, -{L"aacute", 225}, -{L"acirc", 226}, -{L"atilde", 227}, -{L"auml", 228}, -{L"aring", 229}, -{L"aelig", 230}, -{L"ccedil", 231}, -{L"egrave", 232}, -{L"eacute", 233}, -{L"ecirc", 234}, -{L"euml", 235}, -{L"igrave", 236}, -{L"iacute", 237}, -{L"icirc", 238}, -{L"iuml", 239}, -{L"eth", 240}, -{L"ntilde", 241}, -{L"ograve", 242}, -{L"oacute", 243}, -{L"ocirc", 244}, -{L"otilde", 245}, -{L"ouml", 246}, -{L"divide", 247}, -{L"oslash", 248}, -{L"ugrave", 249}, -{L"uacute", 250}, -{L"ucirc", 251}, -{L"uuml", 252}, -{L"yacute", 253}, -{L"thorn", 254}, -{L"yuml", 255}, -{L"quot", 34}, -{L"amp", 38}, -{L"lt", 60}, -{L"gt", 62}, -{L"apos", '\''}, -{L"OElig", 338}, -{L"oelig", 339}, -{L"Scaron", 352}, -{L"scaron", 353}, -{L"Yuml", 376}, -{L"circ", 710}, -{L"tilde", 732}, -{L"ensp", 8194}, -{L"emsp", 8195}, -{L"thinsp", 8201}, -{L"zwnj", 8204}, -{L"zwj", 8205}, -{L"lrm", 8206}, -{L"rlm", 8207}, -{L"ndash", 8211}, -{L"mdash", 8212}, -{L"lsquo", 8216}, -{L"rsquo", 8217}, -{L"sbquo", 8218}, -{L"ldquo", 8220}, -{L"rdquo", 8221}, -{L"bdquo", 8222}, -{L"dagger", 8224}, -{L"Dagger", 8225}, -{L"permil", 8240}, -{L"lsaquo", 8249}, -{L"rsaquo", 8250}, -{L"euro", 8364}, -{L"fnof", 402}, -{L"Alpha", 913}, -{L"Beta", 914}, -{L"Gamma", 915}, -{L"Delta", 916}, -{L"Epsilon", 917}, -{L"Zeta", 918}, -{L"Eta", 919}, -{L"Theta", 920}, -{L"Iota", 921}, -{L"Kappa", 922}, -{L"Lambda", 923}, -{L"Mu", 924}, -{L"Nu", 925}, -{L"Xi", 926}, -{L"Omicron", 927}, -{L"Pi", 928}, -{L"Rho", 929}, -{L"Sigma", 931}, -{L"Tau", 932}, -{L"Upsilon", 933}, -{L"Phi", 934}, -{L"Chi", 935}, -{L"Psi", 936}, -{L"Omega", 937}, -{L"alpha", 945}, -{L"beta", 946}, -{L"gamma", 947}, -{L"delta", 948}, -{L"epsilon", 949}, -{L"zeta", 950}, -{L"eta", 951}, -{L"theta", 952}, -{L"iota", 953}, -{L"kappa", 954}, -{L"lambda", 955}, -{L"mu", 956}, -{L"nu", 957}, -{L"xi", 958}, -{L"omicron", 959}, -{L"pi", 960}, -{L"rho", 961}, -{L"sigmaf", 962}, -{L"sigma", 963}, -{L"tau", 964}, -{L"upsilon", 965}, -{L"phi", 966}, -{L"chi", 967}, -{L"psi", 968}, -{L"omega", 969}, -{L"thetasym", 977}, -{L"upsih", 978}, -{L"piv", 982}, -{L"bull", 8226}, -{L"hellip", 8230}, -{L"prime", 8242}, -{L"Prime", 8243}, -{L"oline", 8254}, -{L"frasl", 8260}, -{L"weierp", 8472}, -{L"image", 8465}, -{L"real", 8476}, -{L"trade", 8482}, -{L"alefsym", 8501}, -{L"larr", 8592}, -{L"uarr", 8593}, -{L"rarr", 8594}, -{L"darr", 8595}, -{L"harr", 8596}, -{L"crarr", 8629}, -{L"lArr", 8656}, -{L"uArr", 8657}, -{L"rArr", 8658}, -{L"dArr", 8659}, -{L"hArr", 8660}, -{L"forall", 8704}, -{L"part", 8706}, -{L"exist", 8707}, -{L"empty", 8709}, -{L"nabla", 8711}, -{L"isin", 8712}, -{L"notin", 8713}, -{L"ni", 8715}, -{L"prod", 8719}, -{L"sum", 8721}, -{L"minus", 8722}, -{L"lowast", 8727}, -{L"radic", 8730}, -{L"prop", 8733}, -{L"infin", 8734}, -{L"ang", 8736}, -{L"and", 8743}, -{L"or", 8744}, -{L"cap", 8745}, -{L"cup", 8746}, -{L"int", 8747}, -{L"there4", 8756}, -{L"sim", 8764}, -{L"cong", 8773}, -{L"asymp", 8776}, -{L"ne", 8800}, -{L"equiv", 8801}, -{L"le", 8804}, -{L"ge", 8805}, -{L"sub", 8834}, -{L"sup", 8835}, -{L"nsub", 8836}, -{L"sube", 8838}, -{L"supe", 8839}, -{L"oplus", 8853}, -{L"otimes", 8855}, -{L"perp", 8869}, -{L"sdot", 8901}, -{L"lceil", 8968}, -{L"rceil", 8969}, -{L"lfloor", 8970}, -{L"rfloor", 8971}, -{L"lang", 9001}, -{L"rang", 9002}, -{L"loz", 9674}, -{L"spades", 9824}, -{L"clubs", 9827}, -{L"hearts", 9829}, -{L"diams", 9830}, +{L"AElig", 198, 0}, +{L"AMP", 38, 0}, +{L"Aacute", 193, 0}, +{L"Abreve", 258, 0}, +{L"Acirc", 194, 0}, +{L"Acy", 1040, 0}, +{L"Afr", 120068, 0}, +{L"Agrave", 192, 0}, +{L"Alpha", 913, 0}, +{L"Amacr", 256, 0}, +{L"And", 10835, 0}, +{L"Aogon", 260, 0}, +{L"Aopf", 120120, 0}, +{L"ApplyFunction", 8289, 0}, +{L"Aring", 197, 0}, +{L"Ascr", 119964, 0}, +{L"Assign", 8788, 0}, +{L"Atilde", 195, 0}, +{L"Auml", 196, 0}, +{L"Backslash", 8726, 0}, +{L"Barv", 10983, 0}, +{L"Barwed", 8966, 0}, +{L"Bcy", 1041, 0}, +{L"Because", 8757, 0}, +{L"Bernoullis", 8492, 0}, +{L"Beta", 914, 0}, +{L"Bfr", 120069, 0}, +{L"Bopf", 120121, 0}, +{L"Breve", 728, 0}, +{L"Bscr", 8492, 0}, +{L"Bumpeq", 8782, 0}, +{L"CHcy", 1063, 0}, +{L"COPY", 169, 0}, +{L"Cacute", 262, 0}, +{L"Cap", 8914, 0}, +{L"CapitalDifferentialD", 8517, 0}, +{L"Cayleys", 8493, 0}, +{L"Ccaron", 268, 0}, +{L"Ccedil", 199, 0}, +{L"Ccirc", 264, 0}, +{L"Cconint", 8752, 0}, +{L"Cdot", 266, 0}, +{L"Cedilla", 184, 0}, +{L"CenterDot", 183, 0}, +{L"Cfr", 8493, 0}, +{L"Chi", 935, 0}, +{L"CircleDot", 8857, 0}, +{L"CircleMinus", 8854, 0}, +{L"CirclePlus", 8853, 0}, +{L"CircleTimes", 8855, 0}, +{L"ClockwiseContourIntegral", 8754, 0}, +{L"CloseCurlyDoubleQuote", 8221, 0}, +{L"CloseCurlyQuote", 8217, 0}, +{L"Colon", 8759, 0}, +{L"Colone", 10868, 0}, +{L"Congruent", 8801, 0}, +{L"Conint", 8751, 0}, +{L"ContourIntegral", 8750, 0}, +{L"Copf", 8450, 0}, +{L"Coproduct", 8720, 0}, +{L"CounterClockwiseContourIntegral", 8755, 0}, +{L"Cross", 10799, 0}, +{L"Cscr", 119966, 0}, +{L"Cup", 8915, 0}, +{L"CupCap", 8781, 0}, +{L"DD", 8517, 0}, +{L"DDotrahd", 10513, 0}, +{L"DJcy", 1026, 0}, +{L"DScy", 1029, 0}, +{L"DZcy", 1039, 0}, +{L"Dagger", 8225, 0}, +{L"Darr", 8609, 0}, +{L"Dashv", 10980, 0}, +{L"Dcaron", 270, 0}, +{L"Dcy", 1044, 0}, +{L"Del", 8711, 0}, +{L"Delta", 916, 0}, +{L"Dfr", 120071, 0}, +{L"DiacriticalAcute", 180, 0}, +{L"DiacriticalDot", 729, 0}, +{L"DiacriticalDoubleAcute", 733, 0}, +{L"DiacriticalGrave", 96, 0}, +{L"DiacriticalTilde", 732, 0}, +{L"Diamond", 8900, 0}, +{L"DifferentialD", 8518, 0}, +{L"Dopf", 120123, 0}, +{L"Dot", 168, 0}, +{L"DotDot", 8412, 0}, +{L"DotEqual", 8784, 0}, +{L"DoubleContourIntegral", 8751, 0}, +{L"DoubleDot", 168, 0}, +{L"DoubleDownArrow", 8659, 0}, +{L"DoubleLeftArrow", 8656, 0}, +{L"DoubleLeftRightArrow", 8660, 0}, +{L"DoubleLeftTee", 10980, 0}, +{L"DoubleLongLeftArrow", 10232, 0}, +{L"DoubleLongLeftRightArrow", 10234, 0}, +{L"DoubleLongRightArrow", 10233, 0}, +{L"DoubleRightArrow", 8658, 0}, +{L"DoubleRightTee", 8872, 0}, +{L"DoubleUpArrow", 8657, 0}, +{L"DoubleUpDownArrow", 8661, 0}, +{L"DoubleVerticalBar", 8741, 0}, +{L"DownArrow", 8595, 0}, +{L"DownArrowBar", 10515, 0}, +{L"DownArrowUpArrow", 8693, 0}, +{L"DownBreve", 785, 0}, +{L"DownLeftRightVector", 10576, 0}, +{L"DownLeftTeeVector", 10590, 0}, +{L"DownLeftVector", 8637, 0}, +{L"DownLeftVectorBar", 10582, 0}, +{L"DownRightTeeVector", 10591, 0}, +{L"DownRightVector", 8641, 0}, +{L"DownRightVectorBar", 10583, 0}, +{L"DownTee", 8868, 0}, +{L"DownTeeArrow", 8615, 0}, +{L"Downarrow", 8659, 0}, +{L"Dscr", 119967, 0}, +{L"Dstrok", 272, 0}, +{L"ENG", 330, 0}, +{L"ETH", 208, 0}, +{L"Eacute", 201, 0}, +{L"Ecaron", 282, 0}, +{L"Ecirc", 202, 0}, +{L"Ecy", 1069, 0}, +{L"Edot", 278, 0}, +{L"Efr", 120072, 0}, +{L"Egrave", 200, 0}, +{L"Element", 8712, 0}, +{L"Emacr", 274, 0}, +{L"EmptySmallSquare", 9723, 0}, +{L"EmptyVerySmallSquare", 9643, 0}, +{L"Eogon", 280, 0}, +{L"Eopf", 120124, 0}, +{L"Epsilon", 917, 0}, +{L"Equal", 10869, 0}, +{L"EqualTilde", 8770, 0}, +{L"Equilibrium", 8652, 0}, +{L"Escr", 8496, 0}, +{L"Esim", 10867, 0}, +{L"Eta", 919, 0}, +{L"Euml", 203, 0}, +{L"Exists", 8707, 0}, +{L"ExponentialE", 8519, 0}, +{L"Fcy", 1060, 0}, +{L"Ffr", 120073, 0}, +{L"FilledSmallSquare", 9724, 0}, +{L"FilledVerySmallSquare", 9642, 0}, +{L"Fopf", 120125, 0}, +{L"ForAll", 8704, 0}, +{L"Fouriertrf", 8497, 0}, +{L"Fscr", 8497, 0}, +{L"GJcy", 1027, 0}, +{L"GT", 62, 0}, +{L"Gamma", 915, 0}, +{L"Gammad", 988, 0}, +{L"Gbreve", 286, 0}, +{L"Gcedil", 290, 0}, +{L"Gcirc", 284, 0}, +{L"Gcy", 1043, 0}, +{L"Gdot", 288, 0}, +{L"Gfr", 120074, 0}, +{L"Gg", 8921, 0}, +{L"Gopf", 120126, 0}, +{L"GreaterEqual", 8805, 0}, +{L"GreaterEqualLess", 8923, 0}, +{L"GreaterFullEqual", 8807, 0}, +{L"GreaterGreater", 10914, 0}, +{L"GreaterLess", 8823, 0}, +{L"GreaterSlantEqual", 10878, 0}, +{L"GreaterTilde", 8819, 0}, +{L"Gscr", 119970, 0}, +{L"Gt", 8811, 0}, +{L"HARDcy", 1066, 0}, +{L"Hacek", 711, 0}, +{L"Hat", 94, 0}, +{L"Hcirc", 292, 0}, +{L"Hfr", 8460, 0}, +{L"HilbertSpace", 8459, 0}, +{L"Hopf", 8461, 0}, +{L"HorizontalLine", 9472, 0}, +{L"Hscr", 8459, 0}, +{L"Hstrok", 294, 0}, +{L"HumpDownHump", 8782, 0}, +{L"HumpEqual", 8783, 0}, +{L"IEcy", 1045, 0}, +{L"IJlig", 306, 0}, +{L"IOcy", 1025, 0}, +{L"Iacute", 205, 0}, +{L"Icirc", 206, 0}, +{L"Icy", 1048, 0}, +{L"Idot", 304, 0}, +{L"Ifr", 8465, 0}, +{L"Igrave", 204, 0}, +{L"Im", 8465, 0}, +{L"Imacr", 298, 0}, +{L"ImaginaryI", 8520, 0}, +{L"Implies", 8658, 0}, +{L"Int", 8748, 0}, +{L"Integral", 8747, 0}, +{L"Intersection", 8898, 0}, +{L"InvisibleComma", 8291, 0}, +{L"InvisibleTimes", 8290, 0}, +{L"Iogon", 302, 0}, +{L"Iopf", 120128, 0}, +{L"Iota", 921, 0}, +{L"Iscr", 8464, 0}, +{L"Itilde", 296, 0}, +{L"Iukcy", 1030, 0}, +{L"Iuml", 207, 0}, +{L"Jcirc", 308, 0}, +{L"Jcy", 1049, 0}, +{L"Jfr", 120077, 0}, +{L"Jopf", 120129, 0}, +{L"Jscr", 119973, 0}, +{L"Jsercy", 1032, 0}, +{L"Jukcy", 1028, 0}, +{L"KHcy", 1061, 0}, +{L"KJcy", 1036, 0}, +{L"Kappa", 922, 0}, +{L"Kcedil", 310, 0}, +{L"Kcy", 1050, 0}, +{L"Kfr", 120078, 0}, +{L"Kopf", 120130, 0}, +{L"Kscr", 119974, 0}, +{L"LJcy", 1033, 0}, +{L"LT", 60, 0}, +{L"Lacute", 313, 0}, +{L"Lambda", 923, 0}, +{L"Lang", 10218, 0}, +{L"Laplacetrf", 8466, 0}, +{L"Larr", 8606, 0}, +{L"Lcaron", 317, 0}, +{L"Lcedil", 315, 0}, +{L"Lcy", 1051, 0}, +{L"LeftAngleBracket", 10216, 0}, +{L"LeftArrow", 8592, 0}, +{L"LeftArrowBar", 8676, 0}, +{L"LeftArrowRightArrow", 8646, 0}, +{L"LeftCeiling", 8968, 0}, +{L"LeftDoubleBracket", 10214, 0}, +{L"LeftDownTeeVector", 10593, 0}, +{L"LeftDownVector", 8643, 0}, +{L"LeftDownVectorBar", 10585, 0}, +{L"LeftFloor", 8970, 0}, +{L"LeftRightArrow", 8596, 0}, +{L"LeftRightVector", 10574, 0}, +{L"LeftTee", 8867, 0}, +{L"LeftTeeArrow", 8612, 0}, +{L"LeftTeeVector", 10586, 0}, +{L"LeftTriangle", 8882, 0}, +{L"LeftTriangleBar", 10703, 0}, +{L"LeftTriangleEqual", 8884, 0}, +{L"LeftUpDownVector", 10577, 0}, +{L"LeftUpTeeVector", 10592, 0}, +{L"LeftUpVector", 8639, 0}, +{L"LeftUpVectorBar", 10584, 0}, +{L"LeftVector", 8636, 0}, +{L"LeftVectorBar", 10578, 0}, +{L"Leftarrow", 8656, 0}, +{L"Leftrightarrow", 8660, 0}, +{L"LessEqualGreater", 8922, 0}, +{L"LessFullEqual", 8806, 0}, +{L"LessGreater", 8822, 0}, +{L"LessLess", 10913, 0}, +{L"LessSlantEqual", 10877, 0}, +{L"LessTilde", 8818, 0}, +{L"Lfr", 120079, 0}, +{L"Ll", 8920, 0}, +{L"Lleftarrow", 8666, 0}, +{L"Lmidot", 319, 0}, +{L"LongLeftArrow", 10229, 0}, +{L"LongLeftRightArrow", 10231, 0}, +{L"LongRightArrow", 10230, 0}, +{L"Longleftarrow", 10232, 0}, +{L"Longleftrightarrow", 10234, 0}, +{L"Longrightarrow", 10233, 0}, +{L"Lopf", 120131, 0}, +{L"LowerLeftArrow", 8601, 0}, +{L"LowerRightArrow", 8600, 0}, +{L"Lscr", 8466, 0}, +{L"Lsh", 8624, 0}, +{L"Lstrok", 321, 0}, +{L"Lt", 8810, 0}, +{L"Map", 10501, 0}, +{L"Mcy", 1052, 0}, +{L"MediumSpace", 8287, 0}, +{L"Mellintrf", 8499, 0}, +{L"Mfr", 120080, 0}, +{L"MinusPlus", 8723, 0}, +{L"Mopf", 120132, 0}, +{L"Mscr", 8499, 0}, +{L"Mu", 924, 0}, +{L"NJcy", 1034, 0}, +{L"Nacute", 323, 0}, +{L"Ncaron", 327, 0}, +{L"Ncedil", 325, 0}, +{L"Ncy", 1053, 0}, +{L"NegativeMediumSpace", 8203, 0}, +{L"NegativeThickSpace", 8203, 0}, +{L"NegativeThinSpace", 8203, 0}, +{L"NegativeVeryThinSpace", 8203, 0}, +{L"NestedGreaterGreater", 8811, 0}, +{L"NestedLessLess", 8810, 0}, +{L"NewLine", 10, 0}, +{L"Nfr", 120081, 0}, +{L"NoBreak", 8288, 0}, +{L"NonBreakingSpace", 160, 0}, +{L"Nopf", 8469, 0}, +{L"Not", 10988, 0}, +{L"NotCongruent", 8802, 0}, +{L"NotCupCap", 8813, 0}, +{L"NotDoubleVerticalBar", 8742, 0}, +{L"NotElement", 8713, 0}, +{L"NotEqual", 8800, 0}, +{L"NotEqualTilde", 8770, 824}, +{L"NotExists", 8708, 0}, +{L"NotGreater", 8815, 0}, +{L"NotGreaterEqual", 8817, 0}, +{L"NotGreaterFullEqual", 8807, 824}, +{L"NotGreaterGreater", 8811, 824}, +{L"NotGreaterLess", 8825, 0}, +{L"NotGreaterSlantEqual", 10878, 824}, +{L"NotGreaterTilde", 8821, 0}, +{L"NotHumpDownHump", 8782, 824}, +{L"NotHumpEqual", 8783, 824}, +{L"NotLeftTriangle", 8938, 0}, +{L"NotLeftTriangleBar", 10703, 824}, +{L"NotLeftTriangleEqual", 8940, 0}, +{L"NotLess", 8814, 0}, +{L"NotLessEqual", 8816, 0}, +{L"NotLessGreater", 8824, 0}, +{L"NotLessLess", 8810, 824}, +{L"NotLessSlantEqual", 10877, 824}, +{L"NotLessTilde", 8820, 0}, +{L"NotNestedGreaterGreater", 10914, 824}, +{L"NotNestedLessLess", 10913, 824}, +{L"NotPrecedes", 8832, 0}, +{L"NotPrecedesEqual", 10927, 824}, +{L"NotPrecedesSlantEqual", 8928, 0}, +{L"NotReverseElement", 8716, 0}, +{L"NotRightTriangle", 8939, 0}, +{L"NotRightTriangleBar", 10704, 824}, +{L"NotRightTriangleEqual", 8941, 0}, +{L"NotSquareSubset", 8847, 824}, +{L"NotSquareSubsetEqual", 8930, 0}, +{L"NotSquareSuperset", 8848, 824}, +{L"NotSquareSupersetEqual", 8931, 0}, +{L"NotSubset", 8834, 8402}, +{L"NotSubsetEqual", 8840, 0}, +{L"NotSucceeds", 8833, 0}, +{L"NotSucceedsEqual", 10928, 824}, +{L"NotSucceedsSlantEqual", 8929, 0}, +{L"NotSucceedsTilde", 8831, 824}, +{L"NotSuperset", 8835, 8402}, +{L"NotSupersetEqual", 8841, 0}, +{L"NotTilde", 8769, 0}, +{L"NotTildeEqual", 8772, 0}, +{L"NotTildeFullEqual", 8775, 0}, +{L"NotTildeTilde", 8777, 0}, +{L"NotVerticalBar", 8740, 0}, +{L"Nscr", 119977, 0}, +{L"Ntilde", 209, 0}, +{L"Nu", 925, 0}, +{L"OElig", 338, 0}, +{L"Oacute", 211, 0}, +{L"Ocirc", 212, 0}, +{L"Ocy", 1054, 0}, +{L"Odblac", 336, 0}, +{L"Ofr", 120082, 0}, +{L"Ograve", 210, 0}, +{L"Omacr", 332, 0}, +{L"Omega", 937, 0}, +{L"Omicron", 927, 0}, +{L"Oopf", 120134, 0}, +{L"OpenCurlyDoubleQuote", 8220, 0}, +{L"OpenCurlyQuote", 8216, 0}, +{L"Or", 10836, 0}, +{L"Oscr", 119978, 0}, +{L"Oslash", 216, 0}, +{L"Otilde", 213, 0}, +{L"Otimes", 10807, 0}, +{L"Ouml", 214, 0}, +{L"OverBar", 8254, 0}, +{L"OverBrace", 9182, 0}, +{L"OverBracket", 9140, 0}, +{L"OverParenthesis", 9180, 0}, +{L"PartialD", 8706, 0}, +{L"Pcy", 1055, 0}, +{L"Pfr", 120083, 0}, +{L"Phi", 934, 0}, +{L"Pi", 928, 0}, +{L"PlusMinus", 177, 0}, +{L"Poincareplane", 8460, 0}, +{L"Popf", 8473, 0}, +{L"Pr", 10939, 0}, +{L"Precedes", 8826, 0}, +{L"PrecedesEqual", 10927, 0}, +{L"PrecedesSlantEqual", 8828, 0}, +{L"PrecedesTilde", 8830, 0}, +{L"Prime", 8243, 0}, +{L"Product", 8719, 0}, +{L"Proportion", 8759, 0}, +{L"Proportional", 8733, 0}, +{L"Pscr", 119979, 0}, +{L"Psi", 936, 0}, +{L"QUOT", 34, 0}, +{L"Qfr", 120084, 0}, +{L"Qopf", 8474, 0}, +{L"Qscr", 119980, 0}, +{L"RBarr", 10512, 0}, +{L"REG", 174, 0}, +{L"Racute", 340, 0}, +{L"Rang", 10219, 0}, +{L"Rarr", 8608, 0}, +{L"Rarrtl", 10518, 0}, +{L"Rcaron", 344, 0}, +{L"Rcedil", 342, 0}, +{L"Rcy", 1056, 0}, +{L"Re", 8476, 0}, +{L"ReverseElement", 8715, 0}, +{L"ReverseEquilibrium", 8651, 0}, +{L"ReverseUpEquilibrium", 10607, 0}, +{L"Rfr", 8476, 0}, +{L"Rho", 929, 0}, +{L"RightAngleBracket", 10217, 0}, +{L"RightArrow", 8594, 0}, +{L"RightArrowBar", 8677, 0}, +{L"RightArrowLeftArrow", 8644, 0}, +{L"RightCeiling", 8969, 0}, +{L"RightDoubleBracket", 10215, 0}, +{L"RightDownTeeVector", 10589, 0}, +{L"RightDownVector", 8642, 0}, +{L"RightDownVectorBar", 10581, 0}, +{L"RightFloor", 8971, 0}, +{L"RightTee", 8866, 0}, +{L"RightTeeArrow", 8614, 0}, +{L"RightTeeVector", 10587, 0}, +{L"RightTriangle", 8883, 0}, +{L"RightTriangleBar", 10704, 0}, +{L"RightTriangleEqual", 8885, 0}, +{L"RightUpDownVector", 10575, 0}, +{L"RightUpTeeVector", 10588, 0}, +{L"RightUpVector", 8638, 0}, +{L"RightUpVectorBar", 10580, 0}, +{L"RightVector", 8640, 0}, +{L"RightVectorBar", 10579, 0}, +{L"Rightarrow", 8658, 0}, +{L"Ropf", 8477, 0}, +{L"RoundImplies", 10608, 0}, +{L"Rrightarrow", 8667, 0}, +{L"Rscr", 8475, 0}, +{L"Rsh", 8625, 0}, +{L"RuleDelayed", 10740, 0}, +{L"SHCHcy", 1065, 0}, +{L"SHcy", 1064, 0}, +{L"SOFTcy", 1068, 0}, +{L"Sacute", 346, 0}, +{L"Sc", 10940, 0}, +{L"Scaron", 352, 0}, +{L"Scedil", 350, 0}, +{L"Scirc", 348, 0}, +{L"Scy", 1057, 0}, +{L"Sfr", 120086, 0}, +{L"ShortDownArrow", 8595, 0}, +{L"ShortLeftArrow", 8592, 0}, +{L"ShortRightArrow", 8594, 0}, +{L"ShortUpArrow", 8593, 0}, +{L"Sigma", 931, 0}, +{L"SmallCircle", 8728, 0}, +{L"Sopf", 120138, 0}, +{L"Sqrt", 8730, 0}, +{L"Square", 9633, 0}, +{L"SquareIntersection", 8851, 0}, +{L"SquareSubset", 8847, 0}, +{L"SquareSubsetEqual", 8849, 0}, +{L"SquareSuperset", 8848, 0}, +{L"SquareSupersetEqual", 8850, 0}, +{L"SquareUnion", 8852, 0}, +{L"Sscr", 119982, 0}, +{L"Star", 8902, 0}, +{L"Sub", 8912, 0}, +{L"Subset", 8912, 0}, +{L"SubsetEqual", 8838, 0}, +{L"Succeeds", 8827, 0}, +{L"SucceedsEqual", 10928, 0}, +{L"SucceedsSlantEqual", 8829, 0}, +{L"SucceedsTilde", 8831, 0}, +{L"SuchThat", 8715, 0}, +{L"Sum", 8721, 0}, +{L"Sup", 8913, 0}, +{L"Superset", 8835, 0}, +{L"SupersetEqual", 8839, 0}, +{L"Supset", 8913, 0}, +{L"THORN", 222, 0}, +{L"TRADE", 8482, 0}, +{L"TSHcy", 1035, 0}, +{L"TScy", 1062, 0}, +{L"Tab", 9, 0}, +{L"Tau", 932, 0}, +{L"Tcaron", 356, 0}, +{L"Tcedil", 354, 0}, +{L"Tcy", 1058, 0}, +{L"Tfr", 120087, 0}, +{L"Therefore", 8756, 0}, +{L"Theta", 920, 0}, +{L"ThickSpace", 8287, 8202}, +{L"ThinSpace", 8201, 0}, +{L"Tilde", 8764, 0}, +{L"TildeEqual", 8771, 0}, +{L"TildeFullEqual", 8773, 0}, +{L"TildeTilde", 8776, 0}, +{L"Topf", 120139, 0}, +{L"TripleDot", 8411, 0}, +{L"Tscr", 119983, 0}, +{L"Tstrok", 358, 0}, +{L"Uacute", 218, 0}, +{L"Uarr", 8607, 0}, +{L"Uarrocir", 10569, 0}, +{L"Ubrcy", 1038, 0}, +{L"Ubreve", 364, 0}, +{L"Ucirc", 219, 0}, +{L"Ucy", 1059, 0}, +{L"Udblac", 368, 0}, +{L"Ufr", 120088, 0}, +{L"Ugrave", 217, 0}, +{L"Umacr", 362, 0}, +{L"UnderBar", 95, 0}, +{L"UnderBrace", 9183, 0}, +{L"UnderBracket", 9141, 0}, +{L"UnderParenthesis", 9181, 0}, +{L"Union", 8899, 0}, +{L"UnionPlus", 8846, 0}, +{L"Uogon", 370, 0}, +{L"Uopf", 120140, 0}, +{L"UpArrow", 8593, 0}, +{L"UpArrowBar", 10514, 0}, +{L"UpArrowDownArrow", 8645, 0}, +{L"UpDownArrow", 8597, 0}, +{L"UpEquilibrium", 10606, 0}, +{L"UpTee", 8869, 0}, +{L"UpTeeArrow", 8613, 0}, +{L"Uparrow", 8657, 0}, +{L"Updownarrow", 8661, 0}, +{L"UpperLeftArrow", 8598, 0}, +{L"UpperRightArrow", 8599, 0}, +{L"Upsi", 978, 0}, +{L"Upsilon", 933, 0}, +{L"Uring", 366, 0}, +{L"Uscr", 119984, 0}, +{L"Utilde", 360, 0}, +{L"Uuml", 220, 0}, +{L"VDash", 8875, 0}, +{L"Vbar", 10987, 0}, +{L"Vcy", 1042, 0}, +{L"Vdash", 8873, 0}, +{L"Vdashl", 10982, 0}, +{L"Vee", 8897, 0}, +{L"Verbar", 8214, 0}, +{L"Vert", 8214, 0}, +{L"VerticalBar", 8739, 0}, +{L"VerticalLine", 124, 0}, +{L"VerticalSeparator", 10072, 0}, +{L"VerticalTilde", 8768, 0}, +{L"VeryThinSpace", 8202, 0}, +{L"Vfr", 120089, 0}, +{L"Vopf", 120141, 0}, +{L"Vscr", 119985, 0}, +{L"Vvdash", 8874, 0}, +{L"Wcirc", 372, 0}, +{L"Wedge", 8896, 0}, +{L"Wfr", 120090, 0}, +{L"Wopf", 120142, 0}, +{L"Wscr", 119986, 0}, +{L"Xfr", 120091, 0}, +{L"Xi", 926, 0}, +{L"Xopf", 120143, 0}, +{L"Xscr", 119987, 0}, +{L"YAcy", 1071, 0}, +{L"YIcy", 1031, 0}, +{L"YUcy", 1070, 0}, +{L"Yacute", 221, 0}, +{L"Ycirc", 374, 0}, +{L"Ycy", 1067, 0}, +{L"Yfr", 120092, 0}, +{L"Yopf", 120144, 0}, +{L"Yscr", 119988, 0}, +{L"Yuml", 376, 0}, +{L"ZHcy", 1046, 0}, +{L"Zacute", 377, 0}, +{L"Zcaron", 381, 0}, +{L"Zcy", 1047, 0}, +{L"Zdot", 379, 0}, +{L"ZeroWidthSpace", 8203, 0}, +{L"Zeta", 918, 0}, +{L"Zfr", 8488, 0}, +{L"Zopf", 8484, 0}, +{L"Zscr", 119989, 0}, +{L"aacute", 225, 0}, +{L"abreve", 259, 0}, +{L"ac", 8766, 0}, +{L"acE", 8766, 819}, +{L"acd", 8767, 0}, +{L"acirc", 226, 0}, +{L"acute", 180, 0}, +{L"acy", 1072, 0}, +{L"aelig", 230, 0}, +{L"af", 8289, 0}, +{L"afr", 120094, 0}, +{L"agrave", 224, 0}, +{L"alefsym", 8501, 0}, +{L"aleph", 8501, 0}, +{L"alpha", 945, 0}, +{L"amacr", 257, 0}, +{L"amalg", 10815, 0}, +{L"amp", 38, 0}, +{L"and", 8743, 0}, +{L"andand", 10837, 0}, +{L"andd", 10844, 0}, +{L"andslope", 10840, 0}, +{L"andv", 10842, 0}, +{L"ang", 8736, 0}, +{L"ange", 10660, 0}, +{L"angle", 8736, 0}, +{L"angmsd", 8737, 0}, +{L"angmsdaa", 10664, 0}, +{L"angmsdab", 10665, 0}, +{L"angmsdac", 10666, 0}, +{L"angmsdad", 10667, 0}, +{L"angmsdae", 10668, 0}, +{L"angmsdaf", 10669, 0}, +{L"angmsdag", 10670, 0}, +{L"angmsdah", 10671, 0}, +{L"angrt", 8735, 0}, +{L"angrtvb", 8894, 0}, +{L"angrtvbd", 10653, 0}, +{L"angsph", 8738, 0}, +{L"angst", 197, 0}, +{L"angzarr", 9084, 0}, +{L"aogon", 261, 0}, +{L"aopf", 120146, 0}, +{L"ap", 8776, 0}, +{L"apE", 10864, 0}, +{L"apacir", 10863, 0}, +{L"ape", 8778, 0}, +{L"apid", 8779, 0}, +{L"apos", 39, 0}, +{L"approx", 8776, 0}, +{L"approxeq", 8778, 0}, +{L"aring", 229, 0}, +{L"ascr", 119990, 0}, +{L"ast", 42, 0}, +{L"asymp", 8776, 0}, +{L"asympeq", 8781, 0}, +{L"atilde", 227, 0}, +{L"auml", 228, 0}, +{L"awconint", 8755, 0}, +{L"awint", 10769, 0}, +{L"bNot", 10989, 0}, +{L"backcong", 8780, 0}, +{L"backepsilon", 1014, 0}, +{L"backprime", 8245, 0}, +{L"backsim", 8765, 0}, +{L"backsimeq", 8909, 0}, +{L"barvee", 8893, 0}, +{L"barwed", 8965, 0}, +{L"barwedge", 8965, 0}, +{L"bbrk", 9141, 0}, +{L"bbrktbrk", 9142, 0}, +{L"bcong", 8780, 0}, +{L"bcy", 1073, 0}, +{L"bdquo", 8222, 0}, +{L"becaus", 8757, 0}, +{L"because", 8757, 0}, +{L"bemptyv", 10672, 0}, +{L"bepsi", 1014, 0}, +{L"bernou", 8492, 0}, +{L"beta", 946, 0}, +{L"beth", 8502, 0}, +{L"between", 8812, 0}, +{L"bfr", 120095, 0}, +{L"bigcap", 8898, 0}, +{L"bigcirc", 9711, 0}, +{L"bigcup", 8899, 0}, +{L"bigodot", 10752, 0}, +{L"bigoplus", 10753, 0}, +{L"bigotimes", 10754, 0}, +{L"bigsqcup", 10758, 0}, +{L"bigstar", 9733, 0}, +{L"bigtriangledown", 9661, 0}, +{L"bigtriangleup", 9651, 0}, +{L"biguplus", 10756, 0}, +{L"bigvee", 8897, 0}, +{L"bigwedge", 8896, 0}, +{L"bkarow", 10509, 0}, +{L"blacklozenge", 10731, 0}, +{L"blacksquare", 9642, 0}, +{L"blacktriangle", 9652, 0}, +{L"blacktriangledown", 9662, 0}, +{L"blacktriangleleft", 9666, 0}, +{L"blacktriangleright", 9656, 0}, +{L"blank", 9251, 0}, +{L"blk12", 9618, 0}, +{L"blk14", 9617, 0}, +{L"blk34", 9619, 0}, +{L"block", 9608, 0}, +{L"bne", 61, 8421}, +{L"bnequiv", 8801, 8421}, +{L"bnot", 8976, 0}, +{L"bopf", 120147, 0}, +{L"bot", 8869, 0}, +{L"bottom", 8869, 0}, +{L"bowtie", 8904, 0}, +{L"boxDL", 9559, 0}, +{L"boxDR", 9556, 0}, +{L"boxDl", 9558, 0}, +{L"boxDr", 9555, 0}, +{L"boxH", 9552, 0}, +{L"boxHD", 9574, 0}, +{L"boxHU", 9577, 0}, +{L"boxHd", 9572, 0}, +{L"boxHu", 9575, 0}, +{L"boxUL", 9565, 0}, +{L"boxUR", 9562, 0}, +{L"boxUl", 9564, 0}, +{L"boxUr", 9561, 0}, +{L"boxV", 9553, 0}, +{L"boxVH", 9580, 0}, +{L"boxVL", 9571, 0}, +{L"boxVR", 9568, 0}, +{L"boxVh", 9579, 0}, +{L"boxVl", 9570, 0}, +{L"boxVr", 9567, 0}, +{L"boxbox", 10697, 0}, +{L"boxdL", 9557, 0}, +{L"boxdR", 9554, 0}, +{L"boxdl", 9488, 0}, +{L"boxdr", 9484, 0}, +{L"boxh", 9472, 0}, +{L"boxhD", 9573, 0}, +{L"boxhU", 9576, 0}, +{L"boxhd", 9516, 0}, +{L"boxhu", 9524, 0}, +{L"boxminus", 8863, 0}, +{L"boxplus", 8862, 0}, +{L"boxtimes", 8864, 0}, +{L"boxuL", 9563, 0}, +{L"boxuR", 9560, 0}, +{L"boxul", 9496, 0}, +{L"boxur", 9492, 0}, +{L"boxv", 9474, 0}, +{L"boxvH", 9578, 0}, +{L"boxvL", 9569, 0}, +{L"boxvR", 9566, 0}, +{L"boxvh", 9532, 0}, +{L"boxvl", 9508, 0}, +{L"boxvr", 9500, 0}, +{L"bprime", 8245, 0}, +{L"breve", 728, 0}, +{L"brvbar", 166, 0}, +{L"bscr", 119991, 0}, +{L"bsemi", 8271, 0}, +{L"bsim", 8765, 0}, +{L"bsime", 8909, 0}, +{L"bsol", 92, 0}, +{L"bsolb", 10693, 0}, +{L"bsolhsub", 10184, 0}, +{L"bull", 8226, 0}, +{L"bullet", 8226, 0}, +{L"bump", 8782, 0}, +{L"bumpE", 10926, 0}, +{L"bumpe", 8783, 0}, +{L"bumpeq", 8783, 0}, +{L"cacute", 263, 0}, +{L"cap", 8745, 0}, +{L"capand", 10820, 0}, +{L"capbrcup", 10825, 0}, +{L"capcap", 10827, 0}, +{L"capcup", 10823, 0}, +{L"capdot", 10816, 0}, +{L"caps", 8745, 65024}, +{L"caret", 8257, 0}, +{L"caron", 711, 0}, +{L"ccaps", 10829, 0}, +{L"ccaron", 269, 0}, +{L"ccedil", 231, 0}, +{L"ccirc", 265, 0}, +{L"ccups", 10828, 0}, +{L"ccupssm", 10832, 0}, +{L"cdot", 267, 0}, +{L"cedil", 184, 0}, +{L"cemptyv", 10674, 0}, +{L"cent", 162, 0}, +{L"centerdot", 183, 0}, +{L"cfr", 120096, 0}, +{L"chcy", 1095, 0}, +{L"check", 10003, 0}, +{L"checkmark", 10003, 0}, +{L"chi", 967, 0}, +{L"cir", 9675, 0}, +{L"cirE", 10691, 0}, +{L"circ", 710, 0}, +{L"circeq", 8791, 0}, +{L"circlearrowleft", 8634, 0}, +{L"circlearrowright", 8635, 0}, +{L"circledR", 174, 0}, +{L"circledS", 9416, 0}, +{L"circledast", 8859, 0}, +{L"circledcirc", 8858, 0}, +{L"circleddash", 8861, 0}, +{L"cire", 8791, 0}, +{L"cirfnint", 10768, 0}, +{L"cirmid", 10991, 0}, +{L"cirscir", 10690, 0}, +{L"clubs", 9827, 0}, +{L"clubsuit", 9827, 0}, +{L"colon", 58, 0}, +{L"colone", 8788, 0}, +{L"coloneq", 8788, 0}, +{L"comma", 44, 0}, +{L"commat", 64, 0}, +{L"comp", 8705, 0}, +{L"compfn", 8728, 0}, +{L"complement", 8705, 0}, +{L"complexes", 8450, 0}, +{L"cong", 8773, 0}, +{L"congdot", 10861, 0}, +{L"conint", 8750, 0}, +{L"copf", 120148, 0}, +{L"coprod", 8720, 0}, +{L"copy", 169, 0}, +{L"copysr", 8471, 0}, +{L"crarr", 8629, 0}, +{L"cross", 10007, 0}, +{L"cscr", 119992, 0}, +{L"csub", 10959, 0}, +{L"csube", 10961, 0}, +{L"csup", 10960, 0}, +{L"csupe", 10962, 0}, +{L"ctdot", 8943, 0}, +{L"cudarrl", 10552, 0}, +{L"cudarrr", 10549, 0}, +{L"cuepr", 8926, 0}, +{L"cuesc", 8927, 0}, +{L"cularr", 8630, 0}, +{L"cularrp", 10557, 0}, +{L"cup", 8746, 0}, +{L"cupbrcap", 10824, 0}, +{L"cupcap", 10822, 0}, +{L"cupcup", 10826, 0}, +{L"cupdot", 8845, 0}, +{L"cupor", 10821, 0}, +{L"cups", 8746, 65024}, +{L"curarr", 8631, 0}, +{L"curarrm", 10556, 0}, +{L"curlyeqprec", 8926, 0}, +{L"curlyeqsucc", 8927, 0}, +{L"curlyvee", 8910, 0}, +{L"curlywedge", 8911, 0}, +{L"curren", 164, 0}, +{L"curvearrowleft", 8630, 0}, +{L"curvearrowright", 8631, 0}, +{L"cuvee", 8910, 0}, +{L"cuwed", 8911, 0}, +{L"cwconint", 8754, 0}, +{L"cwint", 8753, 0}, +{L"cylcty", 9005, 0}, +{L"dArr", 8659, 0}, +{L"dHar", 10597, 0}, +{L"dagger", 8224, 0}, +{L"daleth", 8504, 0}, +{L"darr", 8595, 0}, +{L"dash", 8208, 0}, +{L"dashv", 8867, 0}, +{L"dbkarow", 10511, 0}, +{L"dblac", 733, 0}, +{L"dcaron", 271, 0}, +{L"dcy", 1076, 0}, +{L"dd", 8518, 0}, +{L"ddagger", 8225, 0}, +{L"ddarr", 8650, 0}, +{L"ddotseq", 10871, 0}, +{L"deg", 176, 0}, +{L"delta", 948, 0}, +{L"demptyv", 10673, 0}, +{L"dfisht", 10623, 0}, +{L"dfr", 120097, 0}, +{L"dharl", 8643, 0}, +{L"dharr", 8642, 0}, +{L"diam", 8900, 0}, +{L"diamond", 8900, 0}, +{L"diamondsuit", 9830, 0}, +{L"diams", 9830, 0}, +{L"die", 168, 0}, +{L"digamma", 989, 0}, +{L"disin", 8946, 0}, +{L"div", 247, 0}, +{L"divide", 247, 0}, +{L"divideontimes", 8903, 0}, +{L"divonx", 8903, 0}, +{L"djcy", 1106, 0}, +{L"dlcorn", 8990, 0}, +{L"dlcrop", 8973, 0}, +{L"dollar", 36, 0}, +{L"dopf", 120149, 0}, +{L"dot", 729, 0}, +{L"doteq", 8784, 0}, +{L"doteqdot", 8785, 0}, +{L"dotminus", 8760, 0}, +{L"dotplus", 8724, 0}, +{L"dotsquare", 8865, 0}, +{L"doublebarwedge", 8966, 0}, +{L"downarrow", 8595, 0}, +{L"downdownarrows", 8650, 0}, +{L"downharpoonleft", 8643, 0}, +{L"downharpoonright", 8642, 0}, +{L"drbkarow", 10512, 0}, +{L"drcorn", 8991, 0}, +{L"drcrop", 8972, 0}, +{L"dscr", 119993, 0}, +{L"dscy", 1109, 0}, +{L"dsol", 10742, 0}, +{L"dstrok", 273, 0}, +{L"dtdot", 8945, 0}, +{L"dtri", 9663, 0}, +{L"dtrif", 9662, 0}, +{L"duarr", 8693, 0}, +{L"duhar", 10607, 0}, +{L"dwangle", 10662, 0}, +{L"dzcy", 1119, 0}, +{L"dzigrarr", 10239, 0}, +{L"eDDot", 10871, 0}, +{L"eDot", 8785, 0}, +{L"eacute", 233, 0}, +{L"easter", 10862, 0}, +{L"ecaron", 283, 0}, +{L"ecir", 8790, 0}, +{L"ecirc", 234, 0}, +{L"ecolon", 8789, 0}, +{L"ecy", 1101, 0}, +{L"edot", 279, 0}, +{L"ee", 8519, 0}, +{L"efDot", 8786, 0}, +{L"efr", 120098, 0}, +{L"eg", 10906, 0}, +{L"egrave", 232, 0}, +{L"egs", 10902, 0}, +{L"egsdot", 10904, 0}, +{L"el", 10905, 0}, +{L"elinters", 9191, 0}, +{L"ell", 8467, 0}, +{L"els", 10901, 0}, +{L"elsdot", 10903, 0}, +{L"emacr", 275, 0}, +{L"empty", 8709, 0}, +{L"emptyset", 8709, 0}, +{L"emptyv", 8709, 0}, +{L"emsp", 8195, 0}, +{L"emsp13", 8196, 0}, +{L"emsp14", 8197, 0}, +{L"eng", 331, 0}, +{L"ensp", 8194, 0}, +{L"eogon", 281, 0}, +{L"eopf", 120150, 0}, +{L"epar", 8917, 0}, +{L"eparsl", 10723, 0}, +{L"eplus", 10865, 0}, +{L"epsi", 949, 0}, +{L"epsilon", 949, 0}, +{L"epsiv", 1013, 0}, +{L"eqcirc", 8790, 0}, +{L"eqcolon", 8789, 0}, +{L"eqsim", 8770, 0}, +{L"eqslantgtr", 10902, 0}, +{L"eqslantless", 10901, 0}, +{L"equals", 61, 0}, +{L"equest", 8799, 0}, +{L"equiv", 8801, 0}, +{L"equivDD", 10872, 0}, +{L"eqvparsl", 10725, 0}, +{L"erDot", 8787, 0}, +{L"erarr", 10609, 0}, +{L"escr", 8495, 0}, +{L"esdot", 8784, 0}, +{L"esim", 8770, 0}, +{L"eta", 951, 0}, +{L"eth", 240, 0}, +{L"euml", 235, 0}, +{L"euro", 8364, 0}, +{L"excl", 33, 0}, +{L"exist", 8707, 0}, +{L"expectation", 8496, 0}, +{L"exponentiale", 8519, 0}, +{L"fallingdotseq", 8786, 0}, +{L"fcy", 1092, 0}, +{L"female", 9792, 0}, +{L"ffilig", 64259, 0}, +{L"fflig", 64256, 0}, +{L"ffllig", 64260, 0}, +{L"ffr", 120099, 0}, +{L"filig", 64257, 0}, +{L"fjlig", 102, 106}, +{L"flat", 9837, 0}, +{L"fllig", 64258, 0}, +{L"fltns", 9649, 0}, +{L"fnof", 402, 0}, +{L"fopf", 120151, 0}, +{L"forall", 8704, 0}, +{L"fork", 8916, 0}, +{L"forkv", 10969, 0}, +{L"fpartint", 10765, 0}, +{L"frac12", 189, 0}, +{L"frac13", 8531, 0}, +{L"frac14", 188, 0}, +{L"frac15", 8533, 0}, +{L"frac16", 8537, 0}, +{L"frac18", 8539, 0}, +{L"frac23", 8532, 0}, +{L"frac25", 8534, 0}, +{L"frac34", 190, 0}, +{L"frac35", 8535, 0}, +{L"frac38", 8540, 0}, +{L"frac45", 8536, 0}, +{L"frac56", 8538, 0}, +{L"frac58", 8541, 0}, +{L"frac78", 8542, 0}, +{L"frasl", 8260, 0}, +{L"frown", 8994, 0}, +{L"fscr", 119995, 0}, +{L"gE", 8807, 0}, +{L"gEl", 10892, 0}, +{L"gacute", 501, 0}, +{L"gamma", 947, 0}, +{L"gammad", 989, 0}, +{L"gap", 10886, 0}, +{L"gbreve", 287, 0}, +{L"gcirc", 285, 0}, +{L"gcy", 1075, 0}, +{L"gdot", 289, 0}, +{L"ge", 8805, 0}, +{L"gel", 8923, 0}, +{L"geq", 8805, 0}, +{L"geqq", 8807, 0}, +{L"geqslant", 10878, 0}, +{L"ges", 10878, 0}, +{L"gescc", 10921, 0}, +{L"gesdot", 10880, 0}, +{L"gesdoto", 10882, 0}, +{L"gesdotol", 10884, 0}, +{L"gesl", 8923, 65024}, +{L"gesles", 10900, 0}, +{L"gfr", 120100, 0}, +{L"gg", 8811, 0}, +{L"ggg", 8921, 0}, +{L"gimel", 8503, 0}, +{L"gjcy", 1107, 0}, +{L"gl", 8823, 0}, +{L"glE", 10898, 0}, +{L"gla", 10917, 0}, +{L"glj", 10916, 0}, +{L"gnE", 8809, 0}, +{L"gnap", 10890, 0}, +{L"gnapprox", 10890, 0}, +{L"gne", 10888, 0}, +{L"gneq", 10888, 0}, +{L"gneqq", 8809, 0}, +{L"gnsim", 8935, 0}, +{L"gopf", 120152, 0}, +{L"grave", 96, 0}, +{L"gscr", 8458, 0}, +{L"gsim", 8819, 0}, +{L"gsime", 10894, 0}, +{L"gsiml", 10896, 0}, +{L"gt", 62, 0}, +{L"gtcc", 10919, 0}, +{L"gtcir", 10874, 0}, +{L"gtdot", 8919, 0}, +{L"gtlPar", 10645, 0}, +{L"gtquest", 10876, 0}, +{L"gtrapprox", 10886, 0}, +{L"gtrarr", 10616, 0}, +{L"gtrdot", 8919, 0}, +{L"gtreqless", 8923, 0}, +{L"gtreqqless", 10892, 0}, +{L"gtrless", 8823, 0}, +{L"gtrsim", 8819, 0}, +{L"gvertneqq", 8809, 65024}, +{L"gvnE", 8809, 65024}, +{L"hArr", 8660, 0}, +{L"hairsp", 8202, 0}, +{L"half", 189, 0}, +{L"hamilt", 8459, 0}, +{L"hardcy", 1098, 0}, +{L"harr", 8596, 0}, +{L"harrcir", 10568, 0}, +{L"harrw", 8621, 0}, +{L"hbar", 8463, 0}, +{L"hcirc", 293, 0}, +{L"hearts", 9829, 0}, +{L"heartsuit", 9829, 0}, +{L"hellip", 8230, 0}, +{L"hercon", 8889, 0}, +{L"hfr", 120101, 0}, +{L"hksearow", 10533, 0}, +{L"hkswarow", 10534, 0}, +{L"hoarr", 8703, 0}, +{L"homtht", 8763, 0}, +{L"hookleftarrow", 8617, 0}, +{L"hookrightarrow", 8618, 0}, +{L"hopf", 120153, 0}, +{L"horbar", 8213, 0}, +{L"hscr", 119997, 0}, +{L"hslash", 8463, 0}, +{L"hstrok", 295, 0}, +{L"hybull", 8259, 0}, +{L"hyphen", 8208, 0}, +{L"iacute", 237, 0}, +{L"ic", 8291, 0}, +{L"icirc", 238, 0}, +{L"icy", 1080, 0}, +{L"iecy", 1077, 0}, +{L"iexcl", 161, 0}, +{L"iff", 8660, 0}, +{L"ifr", 120102, 0}, +{L"igrave", 236, 0}, +{L"ii", 8520, 0}, +{L"iiiint", 10764, 0}, +{L"iiint", 8749, 0}, +{L"iinfin", 10716, 0}, +{L"iiota", 8489, 0}, +{L"ijlig", 307, 0}, +{L"imacr", 299, 0}, +{L"image", 8465, 0}, +{L"imagline", 8464, 0}, +{L"imagpart", 8465, 0}, +{L"imath", 305, 0}, +{L"imof", 8887, 0}, +{L"imped", 437, 0}, +{L"in", 8712, 0}, +{L"incare", 8453, 0}, +{L"infin", 8734, 0}, +{L"infintie", 10717, 0}, +{L"inodot", 305, 0}, +{L"int", 8747, 0}, +{L"intcal", 8890, 0}, +{L"integers", 8484, 0}, +{L"intercal", 8890, 0}, +{L"intlarhk", 10775, 0}, +{L"intprod", 10812, 0}, +{L"iocy", 1105, 0}, +{L"iogon", 303, 0}, +{L"iopf", 120154, 0}, +{L"iota", 953, 0}, +{L"iprod", 10812, 0}, +{L"iquest", 191, 0}, +{L"iscr", 119998, 0}, +{L"isin", 8712, 0}, +{L"isinE", 8953, 0}, +{L"isindot", 8949, 0}, +{L"isins", 8948, 0}, +{L"isinsv", 8947, 0}, +{L"isinv", 8712, 0}, +{L"it", 8290, 0}, +{L"itilde", 297, 0}, +{L"iukcy", 1110, 0}, +{L"iuml", 239, 0}, +{L"jcirc", 309, 0}, +{L"jcy", 1081, 0}, +{L"jfr", 120103, 0}, +{L"jmath", 567, 0}, +{L"jopf", 120155, 0}, +{L"jscr", 119999, 0}, +{L"jsercy", 1112, 0}, +{L"jukcy", 1108, 0}, +{L"kappa", 954, 0}, +{L"kappav", 1008, 0}, +{L"kcedil", 311, 0}, +{L"kcy", 1082, 0}, +{L"kfr", 120104, 0}, +{L"kgreen", 312, 0}, +{L"khcy", 1093, 0}, +{L"kjcy", 1116, 0}, +{L"kopf", 120156, 0}, +{L"kscr", 120000, 0}, +{L"lAarr", 8666, 0}, +{L"lArr", 8656, 0}, +{L"lAtail", 10523, 0}, +{L"lBarr", 10510, 0}, +{L"lE", 8806, 0}, +{L"lEg", 10891, 0}, +{L"lHar", 10594, 0}, +{L"lacute", 314, 0}, +{L"laemptyv", 10676, 0}, +{L"lagran", 8466, 0}, +{L"lambda", 955, 0}, +{L"lang", 10216, 0}, +{L"langd", 10641, 0}, +{L"langle", 10216, 0}, +{L"lap", 10885, 0}, +{L"laquo", 171, 0}, +{L"larr", 8592, 0}, +{L"larrb", 8676, 0}, +{L"larrbfs", 10527, 0}, +{L"larrfs", 10525, 0}, +{L"larrhk", 8617, 0}, +{L"larrlp", 8619, 0}, +{L"larrpl", 10553, 0}, +{L"larrsim", 10611, 0}, +{L"larrtl", 8610, 0}, +{L"lat", 10923, 0}, +{L"latail", 10521, 0}, +{L"late", 10925, 0}, +{L"lates", 10925, 65024}, +{L"lbarr", 10508, 0}, +{L"lbbrk", 10098, 0}, +{L"lbrace", 123, 0}, +{L"lbrack", 91, 0}, +{L"lbrke", 10635, 0}, +{L"lbrksld", 10639, 0}, +{L"lbrkslu", 10637, 0}, +{L"lcaron", 318, 0}, +{L"lcedil", 316, 0}, +{L"lceil", 8968, 0}, +{L"lcub", 123, 0}, +{L"lcy", 1083, 0}, +{L"ldca", 10550, 0}, +{L"ldquo", 8220, 0}, +{L"ldquor", 8222, 0}, +{L"ldrdhar", 10599, 0}, +{L"ldrushar", 10571, 0}, +{L"ldsh", 8626, 0}, +{L"le", 8804, 0}, +{L"leftarrow", 8592, 0}, +{L"leftarrowtail", 8610, 0}, +{L"leftharpoondown", 8637, 0}, +{L"leftharpoonup", 8636, 0}, +{L"leftleftarrows", 8647, 0}, +{L"leftrightarrow", 8596, 0}, +{L"leftrightarrows", 8646, 0}, +{L"leftrightharpoons", 8651, 0}, +{L"leftrightsquigarrow", 8621, 0}, +{L"leftthreetimes", 8907, 0}, +{L"leg", 8922, 0}, +{L"leq", 8804, 0}, +{L"leqq", 8806, 0}, +{L"leqslant", 10877, 0}, +{L"les", 10877, 0}, +{L"lescc", 10920, 0}, +{L"lesdot", 10879, 0}, +{L"lesdoto", 10881, 0}, +{L"lesdotor", 10883, 0}, +{L"lesg", 8922, 65024}, +{L"lesges", 10899, 0}, +{L"lessapprox", 10885, 0}, +{L"lessdot", 8918, 0}, +{L"lesseqgtr", 8922, 0}, +{L"lesseqqgtr", 10891, 0}, +{L"lessgtr", 8822, 0}, +{L"lesssim", 8818, 0}, +{L"lfisht", 10620, 0}, +{L"lfloor", 8970, 0}, +{L"lfr", 120105, 0}, +{L"lg", 8822, 0}, +{L"lgE", 10897, 0}, +{L"lhard", 8637, 0}, +{L"lharu", 8636, 0}, +{L"lharul", 10602, 0}, +{L"lhblk", 9604, 0}, +{L"ljcy", 1113, 0}, +{L"ll", 8810, 0}, +{L"llarr", 8647, 0}, +{L"llcorner", 8990, 0}, +{L"llhard", 10603, 0}, +{L"lltri", 9722, 0}, +{L"lmidot", 320, 0}, +{L"lmoust", 9136, 0}, +{L"lmoustache", 9136, 0}, +{L"lnE", 8808, 0}, +{L"lnap", 10889, 0}, +{L"lnapprox", 10889, 0}, +{L"lne", 10887, 0}, +{L"lneq", 10887, 0}, +{L"lneqq", 8808, 0}, +{L"lnsim", 8934, 0}, +{L"loang", 10220, 0}, +{L"loarr", 8701, 0}, +{L"lobrk", 10214, 0}, +{L"longleftarrow", 10229, 0}, +{L"longleftrightarrow", 10231, 0}, +{L"longmapsto", 10236, 0}, +{L"longrightarrow", 10230, 0}, +{L"looparrowleft", 8619, 0}, +{L"looparrowright", 8620, 0}, +{L"lopar", 10629, 0}, +{L"lopf", 120157, 0}, +{L"loplus", 10797, 0}, +{L"lotimes", 10804, 0}, +{L"lowast", 8727, 0}, +{L"lowbar", 95, 0}, +{L"loz", 9674, 0}, +{L"lozenge", 9674, 0}, +{L"lozf", 10731, 0}, +{L"lpar", 40, 0}, +{L"lparlt", 10643, 0}, +{L"lrarr", 8646, 0}, +{L"lrcorner", 8991, 0}, +{L"lrhar", 8651, 0}, +{L"lrhard", 10605, 0}, +{L"lrm", 8206, 0}, +{L"lrtri", 8895, 0}, +{L"lsaquo", 8249, 0}, +{L"lscr", 120001, 0}, +{L"lsh", 8624, 0}, +{L"lsim", 8818, 0}, +{L"lsime", 10893, 0}, +{L"lsimg", 10895, 0}, +{L"lsqb", 91, 0}, +{L"lsquo", 8216, 0}, +{L"lsquor", 8218, 0}, +{L"lstrok", 322, 0}, +{L"lt", 60, 0}, +{L"ltcc", 10918, 0}, +{L"ltcir", 10873, 0}, +{L"ltdot", 8918, 0}, +{L"lthree", 8907, 0}, +{L"ltimes", 8905, 0}, +{L"ltlarr", 10614, 0}, +{L"ltquest", 10875, 0}, +{L"ltrPar", 10646, 0}, +{L"ltri", 9667, 0}, +{L"ltrie", 8884, 0}, +{L"ltrif", 9666, 0}, +{L"lurdshar", 10570, 0}, +{L"luruhar", 10598, 0}, +{L"lvertneqq", 8808, 65024}, +{L"lvnE", 8808, 65024}, +{L"mDDot", 8762, 0}, +{L"macr", 175, 0}, +{L"male", 9794, 0}, +{L"malt", 10016, 0}, +{L"maltese", 10016, 0}, +{L"map", 8614, 0}, +{L"mapsto", 8614, 0}, +{L"mapstodown", 8615, 0}, +{L"mapstoleft", 8612, 0}, +{L"mapstoup", 8613, 0}, +{L"marker", 9646, 0}, +{L"mcomma", 10793, 0}, +{L"mcy", 1084, 0}, +{L"mdash", 8212, 0}, +{L"measuredangle", 8737, 0}, +{L"mfr", 120106, 0}, +{L"mho", 8487, 0}, +{L"micro", 181, 0}, +{L"mid", 8739, 0}, +{L"midast", 42, 0}, +{L"midcir", 10992, 0}, +{L"middot", 183, 0}, +{L"minus", 8722, 0}, +{L"minusb", 8863, 0}, +{L"minusd", 8760, 0}, +{L"minusdu", 10794, 0}, +{L"mlcp", 10971, 0}, +{L"mldr", 8230, 0}, +{L"mnplus", 8723, 0}, +{L"models", 8871, 0}, +{L"mopf", 120158, 0}, +{L"mp", 8723, 0}, +{L"mscr", 120002, 0}, +{L"mstpos", 8766, 0}, +{L"mu", 956, 0}, +{L"multimap", 8888, 0}, +{L"mumap", 8888, 0}, +{L"nGg", 8921, 824}, +{L"nGt", 8811, 8402}, +{L"nGtv", 8811, 824}, +{L"nLeftarrow", 8653, 0}, +{L"nLeftrightarrow", 8654, 0}, +{L"nLl", 8920, 824}, +{L"nLt", 8810, 8402}, +{L"nLtv", 8810, 824}, +{L"nRightarrow", 8655, 0}, +{L"nVDash", 8879, 0}, +{L"nVdash", 8878, 0}, +{L"nabla", 8711, 0}, +{L"nacute", 324, 0}, +{L"nang", 8736, 8402}, +{L"nap", 8777, 0}, +{L"napE", 10864, 824}, +{L"napid", 8779, 824}, +{L"napos", 329, 0}, +{L"napprox", 8777, 0}, +{L"natur", 9838, 0}, +{L"natural", 9838, 0}, +{L"naturals", 8469, 0}, +{L"nbsp", 160, 0}, +{L"nbump", 8782, 824}, +{L"nbumpe", 8783, 824}, +{L"ncap", 10819, 0}, +{L"ncaron", 328, 0}, +{L"ncedil", 326, 0}, +{L"ncong", 8775, 0}, +{L"ncongdot", 10861, 824}, +{L"ncup", 10818, 0}, +{L"ncy", 1085, 0}, +{L"ndash", 8211, 0}, +{L"ne", 8800, 0}, +{L"neArr", 8663, 0}, +{L"nearhk", 10532, 0}, +{L"nearr", 8599, 0}, +{L"nearrow", 8599, 0}, +{L"nedot", 8784, 824}, +{L"nequiv", 8802, 0}, +{L"nesear", 10536, 0}, +{L"nesim", 8770, 824}, +{L"nexist", 8708, 0}, +{L"nexists", 8708, 0}, +{L"nfr", 120107, 0}, +{L"ngE", 8807, 824}, +{L"nge", 8817, 0}, +{L"ngeq", 8817, 0}, +{L"ngeqq", 8807, 824}, +{L"ngeqslant", 10878, 824}, +{L"nges", 10878, 824}, +{L"ngsim", 8821, 0}, +{L"ngt", 8815, 0}, +{L"ngtr", 8815, 0}, +{L"nhArr", 8654, 0}, +{L"nharr", 8622, 0}, +{L"nhpar", 10994, 0}, +{L"ni", 8715, 0}, +{L"nis", 8956, 0}, +{L"nisd", 8954, 0}, +{L"niv", 8715, 0}, +{L"njcy", 1114, 0}, +{L"nlArr", 8653, 0}, +{L"nlE", 8806, 824}, +{L"nlarr", 8602, 0}, +{L"nldr", 8229, 0}, +{L"nle", 8816, 0}, +{L"nleftarrow", 8602, 0}, +{L"nleftrightarrow", 8622, 0}, +{L"nleq", 8816, 0}, +{L"nleqq", 8806, 824}, +{L"nleqslant", 10877, 824}, +{L"nles", 10877, 824}, +{L"nless", 8814, 0}, +{L"nlsim", 8820, 0}, +{L"nlt", 8814, 0}, +{L"nltri", 8938, 0}, +{L"nltrie", 8940, 0}, +{L"nmid", 8740, 0}, +{L"nopf", 120159, 0}, +{L"not", 172, 0}, +{L"notin", 8713, 0}, +{L"notinE", 8953, 824}, +{L"notindot", 8949, 824}, +{L"notinva", 8713, 0}, +{L"notinvb", 8951, 0}, +{L"notinvc", 8950, 0}, +{L"notni", 8716, 0}, +{L"notniva", 8716, 0}, +{L"notnivb", 8958, 0}, +{L"notnivc", 8957, 0}, +{L"npar", 8742, 0}, +{L"nparallel", 8742, 0}, +{L"nparsl", 11005, 8421}, +{L"npart", 8706, 824}, +{L"npolint", 10772, 0}, +{L"npr", 8832, 0}, +{L"nprcue", 8928, 0}, +{L"npre", 10927, 824}, +{L"nprec", 8832, 0}, +{L"npreceq", 10927, 824}, +{L"nrArr", 8655, 0}, +{L"nrarr", 8603, 0}, +{L"nrarrc", 10547, 824}, +{L"nrarrw", 8605, 824}, +{L"nrightarrow", 8603, 0}, +{L"nrtri", 8939, 0}, +{L"nrtrie", 8941, 0}, +{L"nsc", 8833, 0}, +{L"nsccue", 8929, 0}, +{L"nsce", 10928, 824}, +{L"nscr", 120003, 0}, +{L"nshortmid", 8740, 0}, +{L"nshortparallel", 8742, 0}, +{L"nsim", 8769, 0}, +{L"nsime", 8772, 0}, +{L"nsimeq", 8772, 0}, +{L"nsmid", 8740, 0}, +{L"nspar", 8742, 0}, +{L"nsqsube", 8930, 0}, +{L"nsqsupe", 8931, 0}, +{L"nsub", 8836, 0}, +{L"nsubE", 10949, 824}, +{L"nsube", 8840, 0}, +{L"nsubset", 8834, 8402}, +{L"nsubseteq", 8840, 0}, +{L"nsubseteqq", 10949, 824}, +{L"nsucc", 8833, 0}, +{L"nsucceq", 10928, 824}, +{L"nsup", 8837, 0}, +{L"nsupE", 10950, 824}, +{L"nsupe", 8841, 0}, +{L"nsupset", 8835, 8402}, +{L"nsupseteq", 8841, 0}, +{L"nsupseteqq", 10950, 824}, +{L"ntgl", 8825, 0}, +{L"ntilde", 241, 0}, +{L"ntlg", 8824, 0}, +{L"ntriangleleft", 8938, 0}, +{L"ntrianglelefteq", 8940, 0}, +{L"ntriangleright", 8939, 0}, +{L"ntrianglerighteq", 8941, 0}, +{L"nu", 957, 0}, +{L"num", 35, 0}, +{L"numero", 8470, 0}, +{L"numsp", 8199, 0}, +{L"nvDash", 8877, 0}, +{L"nvHarr", 10500, 0}, +{L"nvap", 8781, 8402}, +{L"nvdash", 8876, 0}, +{L"nvge", 8805, 8402}, +{L"nvgt", 62, 8402}, +{L"nvinfin", 10718, 0}, +{L"nvlArr", 10498, 0}, +{L"nvle", 8804, 8402}, +{L"nvlt", 60, 8402}, +{L"nvltrie", 8884, 8402}, +{L"nvrArr", 10499, 0}, +{L"nvrtrie", 8885, 8402}, +{L"nvsim", 8764, 8402}, +{L"nwArr", 8662, 0}, +{L"nwarhk", 10531, 0}, +{L"nwarr", 8598, 0}, +{L"nwarrow", 8598, 0}, +{L"nwnear", 10535, 0}, +{L"oS", 9416, 0}, +{L"oacute", 243, 0}, +{L"oast", 8859, 0}, +{L"ocir", 8858, 0}, +{L"ocirc", 244, 0}, +{L"ocy", 1086, 0}, +{L"odash", 8861, 0}, +{L"odblac", 337, 0}, +{L"odiv", 10808, 0}, +{L"odot", 8857, 0}, +{L"odsold", 10684, 0}, +{L"oelig", 339, 0}, +{L"ofcir", 10687, 0}, +{L"ofr", 120108, 0}, +{L"ogon", 731, 0}, +{L"ograve", 242, 0}, +{L"ogt", 10689, 0}, +{L"ohbar", 10677, 0}, +{L"ohm", 937, 0}, +{L"oint", 8750, 0}, +{L"olarr", 8634, 0}, +{L"olcir", 10686, 0}, +{L"olcross", 10683, 0}, +{L"oline", 8254, 0}, +{L"olt", 10688, 0}, +{L"omacr", 333, 0}, +{L"omega", 969, 0}, +{L"omicron", 959, 0}, +{L"omid", 10678, 0}, +{L"ominus", 8854, 0}, +{L"oopf", 120160, 0}, +{L"opar", 10679, 0}, +{L"operp", 10681, 0}, +{L"oplus", 8853, 0}, +{L"or", 8744, 0}, +{L"orarr", 8635, 0}, +{L"ord", 10845, 0}, +{L"order", 8500, 0}, +{L"orderof", 8500, 0}, +{L"ordf", 170, 0}, +{L"ordm", 186, 0}, +{L"origof", 8886, 0}, +{L"oror", 10838, 0}, +{L"orslope", 10839, 0}, +{L"orv", 10843, 0}, +{L"oscr", 8500, 0}, +{L"oslash", 248, 0}, +{L"osol", 8856, 0}, +{L"otilde", 245, 0}, +{L"otimes", 8855, 0}, +{L"otimesas", 10806, 0}, +{L"ouml", 246, 0}, +{L"ovbar", 9021, 0}, +{L"par", 8741, 0}, +{L"para", 182, 0}, +{L"parallel", 8741, 0}, +{L"parsim", 10995, 0}, +{L"parsl", 11005, 0}, +{L"part", 8706, 0}, +{L"pcy", 1087, 0}, +{L"percnt", 37, 0}, +{L"period", 46, 0}, +{L"permil", 8240, 0}, +{L"perp", 8869, 0}, +{L"pertenk", 8241, 0}, +{L"pfr", 120109, 0}, +{L"phi", 966, 0}, +{L"phiv", 981, 0}, +{L"phmmat", 8499, 0}, +{L"phone", 9742, 0}, +{L"pi", 960, 0}, +{L"pitchfork", 8916, 0}, +{L"piv", 982, 0}, +{L"planck", 8463, 0}, +{L"planckh", 8462, 0}, +{L"plankv", 8463, 0}, +{L"plus", 43, 0}, +{L"plusacir", 10787, 0}, +{L"plusb", 8862, 0}, +{L"pluscir", 10786, 0}, +{L"plusdo", 8724, 0}, +{L"plusdu", 10789, 0}, +{L"pluse", 10866, 0}, +{L"plusmn", 177, 0}, +{L"plussim", 10790, 0}, +{L"plustwo", 10791, 0}, +{L"pm", 177, 0}, +{L"pointint", 10773, 0}, +{L"popf", 120161, 0}, +{L"pound", 163, 0}, +{L"pr", 8826, 0}, +{L"prE", 10931, 0}, +{L"prap", 10935, 0}, +{L"prcue", 8828, 0}, +{L"pre", 10927, 0}, +{L"prec", 8826, 0}, +{L"precapprox", 10935, 0}, +{L"preccurlyeq", 8828, 0}, +{L"preceq", 10927, 0}, +{L"precnapprox", 10937, 0}, +{L"precneqq", 10933, 0}, +{L"precnsim", 8936, 0}, +{L"precsim", 8830, 0}, +{L"prime", 8242, 0}, +{L"primes", 8473, 0}, +{L"prnE", 10933, 0}, +{L"prnap", 10937, 0}, +{L"prnsim", 8936, 0}, +{L"prod", 8719, 0}, +{L"profalar", 9006, 0}, +{L"profline", 8978, 0}, +{L"profsurf", 8979, 0}, +{L"prop", 8733, 0}, +{L"propto", 8733, 0}, +{L"prsim", 8830, 0}, +{L"prurel", 8880, 0}, +{L"pscr", 120005, 0}, +{L"psi", 968, 0}, +{L"puncsp", 8200, 0}, +{L"qfr", 120110, 0}, +{L"qint", 10764, 0}, +{L"qopf", 120162, 0}, +{L"qprime", 8279, 0}, +{L"qscr", 120006, 0}, +{L"quaternions", 8461, 0}, +{L"quatint", 10774, 0}, +{L"quest", 63, 0}, +{L"questeq", 8799, 0}, +{L"quot", 34, 0}, +{L"rAarr", 8667, 0}, +{L"rArr", 8658, 0}, +{L"rAtail", 10524, 0}, +{L"rBarr", 10511, 0}, +{L"rHar", 10596, 0}, +{L"race", 8765, 817}, +{L"racute", 341, 0}, +{L"radic", 8730, 0}, +{L"raemptyv", 10675, 0}, +{L"rang", 10217, 0}, +{L"rangd", 10642, 0}, +{L"range", 10661, 0}, +{L"rangle", 10217, 0}, +{L"raquo", 187, 0}, +{L"rarr", 8594, 0}, +{L"rarrap", 10613, 0}, +{L"rarrb", 8677, 0}, +{L"rarrbfs", 10528, 0}, +{L"rarrc", 10547, 0}, +{L"rarrfs", 10526, 0}, +{L"rarrhk", 8618, 0}, +{L"rarrlp", 8620, 0}, +{L"rarrpl", 10565, 0}, +{L"rarrsim", 10612, 0}, +{L"rarrtl", 8611, 0}, +{L"rarrw", 8605, 0}, +{L"ratail", 10522, 0}, +{L"ratio", 8758, 0}, +{L"rationals", 8474, 0}, +{L"rbarr", 10509, 0}, +{L"rbbrk", 10099, 0}, +{L"rbrace", 125, 0}, +{L"rbrack", 93, 0}, +{L"rbrke", 10636, 0}, +{L"rbrksld", 10638, 0}, +{L"rbrkslu", 10640, 0}, +{L"rcaron", 345, 0}, +{L"rcedil", 343, 0}, +{L"rceil", 8969, 0}, +{L"rcub", 125, 0}, +{L"rcy", 1088, 0}, +{L"rdca", 10551, 0}, +{L"rdldhar", 10601, 0}, +{L"rdquo", 8221, 0}, +{L"rdquor", 8221, 0}, +{L"rdsh", 8627, 0}, +{L"real", 8476, 0}, +{L"realine", 8475, 0}, +{L"realpart", 8476, 0}, +{L"reals", 8477, 0}, +{L"rect", 9645, 0}, +{L"reg", 174, 0}, +{L"rfisht", 10621, 0}, +{L"rfloor", 8971, 0}, +{L"rfr", 120111, 0}, +{L"rhard", 8641, 0}, +{L"rharu", 8640, 0}, +{L"rharul", 10604, 0}, +{L"rho", 961, 0}, +{L"rhov", 1009, 0}, +{L"rightarrow", 8594, 0}, +{L"rightarrowtail", 8611, 0}, +{L"rightharpoondown", 8641, 0}, +{L"rightharpoonup", 8640, 0}, +{L"rightleftarrows", 8644, 0}, +{L"rightleftharpoons", 8652, 0}, +{L"rightrightarrows", 8649, 0}, +{L"rightsquigarrow", 8605, 0}, +{L"rightthreetimes", 8908, 0}, +{L"ring", 730, 0}, +{L"risingdotseq", 8787, 0}, +{L"rlarr", 8644, 0}, +{L"rlhar", 8652, 0}, +{L"rlm", 8207, 0}, +{L"rmoust", 9137, 0}, +{L"rmoustache", 9137, 0}, +{L"rnmid", 10990, 0}, +{L"roang", 10221, 0}, +{L"roarr", 8702, 0}, +{L"robrk", 10215, 0}, +{L"ropar", 10630, 0}, +{L"ropf", 120163, 0}, +{L"roplus", 10798, 0}, +{L"rotimes", 10805, 0}, +{L"rpar", 41, 0}, +{L"rpargt", 10644, 0}, +{L"rppolint", 10770, 0}, +{L"rrarr", 8649, 0}, +{L"rsaquo", 8250, 0}, +{L"rscr", 120007, 0}, +{L"rsh", 8625, 0}, +{L"rsqb", 93, 0}, +{L"rsquo", 8217, 0}, +{L"rsquor", 8217, 0}, +{L"rthree", 8908, 0}, +{L"rtimes", 8906, 0}, +{L"rtri", 9657, 0}, +{L"rtrie", 8885, 0}, +{L"rtrif", 9656, 0}, +{L"rtriltri", 10702, 0}, +{L"ruluhar", 10600, 0}, +{L"rx", 8478, 0}, +{L"sacute", 347, 0}, +{L"sbquo", 8218, 0}, +{L"sc", 8827, 0}, +{L"scE", 10932, 0}, +{L"scap", 10936, 0}, +{L"scaron", 353, 0}, +{L"sccue", 8829, 0}, +{L"sce", 10928, 0}, +{L"scedil", 351, 0}, +{L"scirc", 349, 0}, +{L"scnE", 10934, 0}, +{L"scnap", 10938, 0}, +{L"scnsim", 8937, 0}, +{L"scpolint", 10771, 0}, +{L"scsim", 8831, 0}, +{L"scy", 1089, 0}, +{L"sdot", 8901, 0}, +{L"sdotb", 8865, 0}, +{L"sdote", 10854, 0}, +{L"seArr", 8664, 0}, +{L"searhk", 10533, 0}, +{L"searr", 8600, 0}, +{L"searrow", 8600, 0}, +{L"sect", 167, 0}, +{L"semi", 59, 0}, +{L"seswar", 10537, 0}, +{L"setminus", 8726, 0}, +{L"setmn", 8726, 0}, +{L"sext", 10038, 0}, +{L"sfr", 120112, 0}, +{L"sfrown", 8994, 0}, +{L"sharp", 9839, 0}, +{L"shchcy", 1097, 0}, +{L"shcy", 1096, 0}, +{L"shortmid", 8739, 0}, +{L"shortparallel", 8741, 0}, +{L"shy", 173, 0}, +{L"sigma", 963, 0}, +{L"sigmaf", 962, 0}, +{L"sigmav", 962, 0}, +{L"sim", 8764, 0}, +{L"simdot", 10858, 0}, +{L"sime", 8771, 0}, +{L"simeq", 8771, 0}, +{L"simg", 10910, 0}, +{L"simgE", 10912, 0}, +{L"siml", 10909, 0}, +{L"simlE", 10911, 0}, +{L"simne", 8774, 0}, +{L"simplus", 10788, 0}, +{L"simrarr", 10610, 0}, +{L"slarr", 8592, 0}, +{L"smallsetminus", 8726, 0}, +{L"smashp", 10803, 0}, +{L"smeparsl", 10724, 0}, +{L"smid", 8739, 0}, +{L"smile", 8995, 0}, +{L"smt", 10922, 0}, +{L"smte", 10924, 0}, +{L"smtes", 10924, 65024}, +{L"softcy", 1100, 0}, +{L"sol", 47, 0}, +{L"solb", 10692, 0}, +{L"solbar", 9023, 0}, +{L"sopf", 120164, 0}, +{L"spades", 9824, 0}, +{L"spadesuit", 9824, 0}, +{L"spar", 8741, 0}, +{L"sqcap", 8851, 0}, +{L"sqcaps", 8851, 65024}, +{L"sqcup", 8852, 0}, +{L"sqcups", 8852, 65024}, +{L"sqsub", 8847, 0}, +{L"sqsube", 8849, 0}, +{L"sqsubset", 8847, 0}, +{L"sqsubseteq", 8849, 0}, +{L"sqsup", 8848, 0}, +{L"sqsupe", 8850, 0}, +{L"sqsupset", 8848, 0}, +{L"sqsupseteq", 8850, 0}, +{L"squ", 9633, 0}, +{L"square", 9633, 0}, +{L"squarf", 9642, 0}, +{L"squf", 9642, 0}, +{L"srarr", 8594, 0}, +{L"sscr", 120008, 0}, +{L"ssetmn", 8726, 0}, +{L"ssmile", 8995, 0}, +{L"sstarf", 8902, 0}, +{L"star", 9734, 0}, +{L"starf", 9733, 0}, +{L"straightepsilon", 1013, 0}, +{L"straightphi", 981, 0}, +{L"strns", 175, 0}, +{L"sub", 8834, 0}, +{L"subE", 10949, 0}, +{L"subdot", 10941, 0}, +{L"sube", 8838, 0}, +{L"subedot", 10947, 0}, +{L"submult", 10945, 0}, +{L"subnE", 10955, 0}, +{L"subne", 8842, 0}, +{L"subplus", 10943, 0}, +{L"subrarr", 10617, 0}, +{L"subset", 8834, 0}, +{L"subseteq", 8838, 0}, +{L"subseteqq", 10949, 0}, +{L"subsetneq", 8842, 0}, +{L"subsetneqq", 10955, 0}, +{L"subsim", 10951, 0}, +{L"subsub", 10965, 0}, +{L"subsup", 10963, 0}, +{L"succ", 8827, 0}, +{L"succapprox", 10936, 0}, +{L"succcurlyeq", 8829, 0}, +{L"succeq", 10928, 0}, +{L"succnapprox", 10938, 0}, +{L"succneqq", 10934, 0}, +{L"succnsim", 8937, 0}, +{L"succsim", 8831, 0}, +{L"sum", 8721, 0}, +{L"sung", 9834, 0}, +{L"sup", 8835, 0}, +{L"sup1", 185, 0}, +{L"sup2", 178, 0}, +{L"sup3", 179, 0}, +{L"supE", 10950, 0}, +{L"supdot", 10942, 0}, +{L"supdsub", 10968, 0}, +{L"supe", 8839, 0}, +{L"supedot", 10948, 0}, +{L"suphsol", 10185, 0}, +{L"suphsub", 10967, 0}, +{L"suplarr", 10619, 0}, +{L"supmult", 10946, 0}, +{L"supnE", 10956, 0}, +{L"supne", 8843, 0}, +{L"supplus", 10944, 0}, +{L"supset", 8835, 0}, +{L"supseteq", 8839, 0}, +{L"supseteqq", 10950, 0}, +{L"supsetneq", 8843, 0}, +{L"supsetneqq", 10956, 0}, +{L"supsim", 10952, 0}, +{L"supsub", 10964, 0}, +{L"supsup", 10966, 0}, +{L"swArr", 8665, 0}, +{L"swarhk", 10534, 0}, +{L"swarr", 8601, 0}, +{L"swarrow", 8601, 0}, +{L"swnwar", 10538, 0}, +{L"szlig", 223, 0}, +{L"target", 8982, 0}, +{L"tau", 964, 0}, +{L"tbrk", 9140, 0}, +{L"tcaron", 357, 0}, +{L"tcedil", 355, 0}, +{L"tcy", 1090, 0}, +{L"tdot", 8411, 0}, +{L"telrec", 8981, 0}, +{L"tfr", 120113, 0}, +{L"there4", 8756, 0}, +{L"therefore", 8756, 0}, +{L"theta", 952, 0}, +{L"thetasym", 977, 0}, +{L"thetav", 977, 0}, +{L"thickapprox", 8776, 0}, +{L"thicksim", 8764, 0}, +{L"thinsp", 8201, 0}, +{L"thkap", 8776, 0}, +{L"thksim", 8764, 0}, +{L"thorn", 254, 0}, +{L"tilde", 732, 0}, +{L"times", 215, 0}, +{L"timesb", 8864, 0}, +{L"timesbar", 10801, 0}, +{L"timesd", 10800, 0}, +{L"tint", 8749, 0}, +{L"toea", 10536, 0}, +{L"top", 8868, 0}, +{L"topbot", 9014, 0}, +{L"topcir", 10993, 0}, +{L"topf", 120165, 0}, +{L"topfork", 10970, 0}, +{L"tosa", 10537, 0}, +{L"tprime", 8244, 0}, +{L"trade", 8482, 0}, +{L"triangle", 9653, 0}, +{L"triangledown", 9663, 0}, +{L"triangleleft", 9667, 0}, +{L"trianglelefteq", 8884, 0}, +{L"triangleq", 8796, 0}, +{L"triangleright", 9657, 0}, +{L"trianglerighteq", 8885, 0}, +{L"tridot", 9708, 0}, +{L"trie", 8796, 0}, +{L"triminus", 10810, 0}, +{L"triplus", 10809, 0}, +{L"trisb", 10701, 0}, +{L"tritime", 10811, 0}, +{L"trpezium", 9186, 0}, +{L"tscr", 120009, 0}, +{L"tscy", 1094, 0}, +{L"tshcy", 1115, 0}, +{L"tstrok", 359, 0}, +{L"twixt", 8812, 0}, +{L"twoheadleftarrow", 8606, 0}, +{L"twoheadrightarrow", 8608, 0}, +{L"uArr", 8657, 0}, +{L"uHar", 10595, 0}, +{L"uacute", 250, 0}, +{L"uarr", 8593, 0}, +{L"ubrcy", 1118, 0}, +{L"ubreve", 365, 0}, +{L"ucirc", 251, 0}, +{L"ucy", 1091, 0}, +{L"udarr", 8645, 0}, +{L"udblac", 369, 0}, +{L"udhar", 10606, 0}, +{L"ufisht", 10622, 0}, +{L"ufr", 120114, 0}, +{L"ugrave", 249, 0}, +{L"uharl", 8639, 0}, +{L"uharr", 8638, 0}, +{L"uhblk", 9600, 0}, +{L"ulcorn", 8988, 0}, +{L"ulcorner", 8988, 0}, +{L"ulcrop", 8975, 0}, +{L"ultri", 9720, 0}, +{L"umacr", 363, 0}, +{L"uml", 168, 0}, +{L"uogon", 371, 0}, +{L"uopf", 120166, 0}, +{L"uparrow", 8593, 0}, +{L"updownarrow", 8597, 0}, +{L"upharpoonleft", 8639, 0}, +{L"upharpoonright", 8638, 0}, +{L"uplus", 8846, 0}, +{L"upsi", 965, 0}, +{L"upsih", 978, 0}, +{L"upsilon", 965, 0}, +{L"upuparrows", 8648, 0}, +{L"urcorn", 8989, 0}, +{L"urcorner", 8989, 0}, +{L"urcrop", 8974, 0}, +{L"uring", 367, 0}, +{L"urtri", 9721, 0}, +{L"uscr", 120010, 0}, +{L"utdot", 8944, 0}, +{L"utilde", 361, 0}, +{L"utri", 9653, 0}, +{L"utrif", 9652, 0}, +{L"uuarr", 8648, 0}, +{L"uuml", 252, 0}, +{L"uwangle", 10663, 0}, +{L"vArr", 8661, 0}, +{L"vBar", 10984, 0}, +{L"vBarv", 10985, 0}, +{L"vDash", 8872, 0}, +{L"vangrt", 10652, 0}, +{L"varepsilon", 1013, 0}, +{L"varkappa", 1008, 0}, +{L"varnothing", 8709, 0}, +{L"varphi", 981, 0}, +{L"varpi", 982, 0}, +{L"varpropto", 8733, 0}, +{L"varr", 8597, 0}, +{L"varrho", 1009, 0}, +{L"varsigma", 962, 0}, +{L"varsubsetneq", 8842, 65024}, +{L"varsubsetneqq", 10955, 65024}, +{L"varsupsetneq", 8843, 65024}, +{L"varsupsetneqq", 10956, 65024}, +{L"vartheta", 977, 0}, +{L"vartriangleleft", 8882, 0}, +{L"vartriangleright", 8883, 0}, +{L"vcy", 1074, 0}, +{L"vdash", 8866, 0}, +{L"vee", 8744, 0}, +{L"veebar", 8891, 0}, +{L"veeeq", 8794, 0}, +{L"vellip", 8942, 0}, +{L"verbar", 124, 0}, +{L"vert", 124, 0}, +{L"vfr", 120115, 0}, +{L"vltri", 8882, 0}, +{L"vnsub", 8834, 8402}, +{L"vnsup", 8835, 8402}, +{L"vopf", 120167, 0}, +{L"vprop", 8733, 0}, +{L"vrtri", 8883, 0}, +{L"vscr", 120011, 0}, +{L"vsubnE", 10955, 65024}, +{L"vsubne", 8842, 65024}, +{L"vsupnE", 10956, 65024}, +{L"vsupne", 8843, 65024}, +{L"vzigzag", 10650, 0}, +{L"wcirc", 373, 0}, +{L"wedbar", 10847, 0}, +{L"wedge", 8743, 0}, +{L"wedgeq", 8793, 0}, +{L"weierp", 8472, 0}, +{L"wfr", 120116, 0}, +{L"wopf", 120168, 0}, +{L"wp", 8472, 0}, +{L"wr", 8768, 0}, +{L"wreath", 8768, 0}, +{L"wscr", 120012, 0}, +{L"xcap", 8898, 0}, +{L"xcirc", 9711, 0}, +{L"xcup", 8899, 0}, +{L"xdtri", 9661, 0}, +{L"xfr", 120117, 0}, +{L"xhArr", 10234, 0}, +{L"xharr", 10231, 0}, +{L"xi", 958, 0}, +{L"xlArr", 10232, 0}, +{L"xlarr", 10229, 0}, +{L"xmap", 10236, 0}, +{L"xnis", 8955, 0}, +{L"xodot", 10752, 0}, +{L"xopf", 120169, 0}, +{L"xoplus", 10753, 0}, +{L"xotime", 10754, 0}, +{L"xrArr", 10233, 0}, +{L"xrarr", 10230, 0}, +{L"xscr", 120013, 0}, +{L"xsqcup", 10758, 0}, +{L"xuplus", 10756, 0}, +{L"xutri", 9651, 0}, +{L"xvee", 8897, 0}, +{L"xwedge", 8896, 0}, +{L"yacute", 253, 0}, +{L"yacy", 1103, 0}, +{L"ycirc", 375, 0}, +{L"ycy", 1099, 0}, +{L"yen", 165, 0}, +{L"yfr", 120118, 0}, +{L"yicy", 1111, 0}, +{L"yopf", 120170, 0}, +{L"yscr", 120014, 0}, +{L"yucy", 1102, 0}, +{L"yuml", 255, 0}, +{L"zacute", 378, 0}, +{L"zcaron", 382, 0}, +{L"zcy", 1079, 0}, +{L"zdot", 380, 0}, +{L"zeetrf", 8488, 0}, +{L"zeta", 950, 0}, +{L"zfr", 120119, 0}, +{L"zhcy", 1078, 0}, +{L"zigrarr", 8669, 0}, +{L"zopf", 120171, 0}, +{L"zscr", 120015, 0}, +{L"zwj", 8205, 0}, +{L"zwnj", 8204, 0}, {NULL, 0}, }; @@ -3528,12 +5404,38 @@ int PreProcessXmlString(lChar16 * str, int len, lUInt32 flags, const lChar16 * e entname[k] = 0; int n; lChar16 code = 0; - // TODO: optimize search + lChar16 code2 = 0; if ( str[i+k]==';' || str[i+k]==' ' ) { - for ( n=0; def_entity_table[n].name; n++ ) { - if ( !lStr_cmp( def_entity_table[n].name, entname ) ) { - code = def_entity_table[n].code; - break; + // Nb of iterations for some classic named entities: + // nbsp: 5 - amp: 7 - lt: 8 - quot: 9 + // apos gt shy eacute 10 + // Let's have some early straight comparisons for the ones we + // have a chance to find in huge quantities in some documents. + if ( !lStr_cmp( entname, L"nbsp" ) ) + code = 160; + else if ( !lStr_cmp( entname, L"shy" ) ) + code = 173; + else { + // Binary search (usually takes 5 to 12 iterations) + int left = 0; + int right = sizeof(def_entity_table) / sizeof((def_entity_table)[0]) - 1; // ignore last NULL + int middle; + int iters = 0; + while ( left < right ) { + iters++; + middle = (left + right) / 2; + int res = lStr_cmp( entname, def_entity_table[middle].name ); + if ( res == 0 ) { + code = def_entity_table[middle].code; + code2 = def_entity_table[middle].code2; + break; + } + else if ( res < 0 ) { + right = middle; + } + else { + left = middle + 1; + } } } } @@ -3543,6 +5445,11 @@ int PreProcessXmlString(lChar16 * str, int len, lUInt32 flags, const lChar16 * e if ( enc_table && code<256 && code>=128 ) code = enc_table[code - 128]; str[j++] = code; + if ( code2 ) { + if ( enc_table && code2<256 && code2>=128 ) + code2 = enc_table[code2 - 128]; + str[j++] = code2; + } nsp = 0; } else { // include & and rest of entity into output string @@ -4040,235 +5947,192 @@ static const signed char base64_decode_table[] = { 41,42,43,44,45,46,47,48,49,50,51,-1,-1,-1,-1,-1 //112..127 70 }; -#define BASE64_BUF_SIZE 128 -class LVBase64Stream : public LVNamedStream +int LVBase64Stream::readNextBytes() { -private: - lString8 m_curr_text; - int m_text_pos; - lvsize_t m_size; - lvpos_t m_pos; - - int m_iteration; - lUInt32 m_value; - - lUInt8 m_bytes[BASE64_BUF_SIZE]; - int m_bytes_count; - int m_bytes_pos; - - int readNextBytes() + int bytesRead = 0; + bool flgEof = false; + while ( bytesRead == 0 && !flgEof ) { - int bytesRead = 0; - bool flgEof = false; - while ( bytesRead == 0 && !flgEof ) + while ( m_text_pos >= (int)m_curr_text.length() ) { - while ( m_text_pos >= (int)m_curr_text.length() ) - { - return bytesRead; - } - int len = m_curr_text.length(); - const lChar8 * txt = m_curr_text.c_str(); - for ( ; m_text_pos>4) & 0xFF); - bytesRead++; - } - else if ( m_iteration == 3 ) - { - m_bytes[m_bytes_count++] = (lUInt8)((m_value>>10) & 0xFF); - m_bytes[m_bytes_count++] = (lUInt8)((m_value>>2) & 0xFF); - bytesRead += 2; - } - // stop!!! - //m_text_pos--; - m_iteration = 0; - flgEof = true; - break; + m_bytes[m_bytes_count++] = (lUInt8)((m_value>>4) & 0xFF); + bytesRead++; } - else + else if ( m_iteration == 3 ) { - int k = base64_decode_table[ch]; - if ( !(k & 0x80) ) { - // next base-64 digit - m_value = (m_value << 6) | (k); - m_iteration++; - if (m_iteration==4) - { - // - m_bytes[m_bytes_count++] = (lUInt8)((m_value>>16) & 0xFF); - m_bytes[m_bytes_count++] = (lUInt8)((m_value>>8) & 0xFF); - m_bytes[m_bytes_count++] = (lUInt8)((m_value>>0) & 0xFF); - m_iteration = 0; - m_value = 0; - bytesRead+=3; - } - } else { - //m_text_pos++; + m_bytes[m_bytes_count++] = (lUInt8)((m_value>>10) & 0xFF); + m_bytes[m_bytes_count++] = (lUInt8)((m_value>>2) & 0xFF); + bytesRead += 2; + } + // stop!!! + //m_text_pos--; + m_iteration = 0; + flgEof = true; + break; + } + else + { + int k = base64_decode_table[ch]; + if ( !(k & 0x80) ) { + // next base-64 digit + m_value = (m_value << 6) | (k); + m_iteration++; + if (m_iteration==4) + { + // + m_bytes[m_bytes_count++] = (lUInt8)((m_value>>16) & 0xFF); + m_bytes[m_bytes_count++] = (lUInt8)((m_value>>8) & 0xFF); + m_bytes[m_bytes_count++] = (lUInt8)((m_value>>0) & 0xFF); + m_iteration = 0; + m_value = 0; + bytesRead+=3; } + } else { + //m_text_pos++; } } } } - return bytesRead; } + return bytesRead; +} - int bytesAvailable() { return m_bytes_count - m_bytes_pos; } +int LVBase64Stream::bytesAvailable() +{ + return m_bytes_count - m_bytes_pos; +} - bool rewind() - { - m_pos = 0; - m_bytes_count = 0; - m_bytes_pos = 0; - m_iteration = 0; - m_value = 0; - m_text_pos = 0; - return m_text_pos < m_curr_text.length(); - } +bool LVBase64Stream::rewind() +{ + m_pos = 0; + m_bytes_count = 0; + m_bytes_pos = 0; + m_iteration = 0; + m_value = 0; + m_text_pos = 0; + return m_text_pos < m_curr_text.length(); +} - bool skip( lvsize_t count ) +bool LVBase64Stream::skip( lvsize_t count ) +{ + while ( count ) { - while ( count ) + if ( m_bytes_pos >= m_bytes_count ) { - if ( m_bytes_pos >= m_bytes_count ) - { - m_bytes_pos = 0; - m_bytes_count = 0; - int bytesRead = readNextBytes(); - if ( bytesRead == 0 ) - return false; - } - int diff = (int) (m_bytes_count - m_bytes_pos); - if (diff > (int)count) - diff = (int)count; - m_pos += diff; - count -= diff; - } - return true; - } - -public: - virtual ~LVBase64Stream() { } - LVBase64Stream(lString8 data) - : m_curr_text(data), m_size(0), m_pos(0) - { - // calculate size - rewind(); - m_size = bytesAvailable(); - for (;;) { - int bytesRead = readNextBytes(); - if ( !bytesRead ) - break; - m_bytes_count = 0; m_bytes_pos = 0; - m_size += bytesRead; + m_bytes_count = 0; + int bytesRead = readNextBytes(); + if ( bytesRead == 0 ) + return false; } - // rewind - rewind(); - } - virtual bool Eof() - { - return m_pos >= m_size; - } - virtual lvsize_t GetSize() - { - return m_size; + int diff = (int) (m_bytes_count - m_bytes_pos); + if (diff > (int)count) + diff = (int)count; + m_pos += diff; + count -= diff; } + return true; +} - virtual lvpos_t GetPos() - { - return m_pos; +LVBase64Stream::LVBase64Stream(lString8 data) + : m_curr_text(data), m_size(0), m_pos(0) +{ + // calculate size + rewind(); + m_size = bytesAvailable(); + for (;;) { + int bytesRead = readNextBytes(); + if ( !bytesRead ) + break; + m_bytes_count = 0; + m_bytes_pos = 0; + m_size += bytesRead; } + // rewind + rewind(); +} - virtual lverror_t GetPos( lvpos_t * pos ) - { - if (pos) - *pos = m_pos; - return LVERR_OK; +lverror_t LVBase64Stream::Seek(lvoffset_t offset, lvseek_origin_t origin, lvpos_t* newPos) +{ + lvpos_t npos = 0; + lvpos_t currpos = GetPos(); + switch (origin) { + case LVSEEK_SET: + npos = offset; + break; + case LVSEEK_CUR: + npos = currpos + offset; + break; + case LVSEEK_END: + npos = m_size + offset; + break; } - - virtual lverror_t Seek(lvoffset_t offset, lvseek_origin_t origin, lvpos_t* newPos) + if (npos > m_size) + return LVERR_FAIL; + if ( npos != currpos ) { - lvpos_t npos = 0; - lvpos_t currpos = GetPos(); - switch (origin) { - case LVSEEK_SET: - npos = offset; - break; - case LVSEEK_CUR: - npos = currpos + offset; - break; - case LVSEEK_END: - npos = m_size + offset; - break; + if (npos < currpos) + { + if ( !rewind() || !skip(npos) ) + return LVERR_FAIL; } - if (npos > m_size) - return LVERR_FAIL; - if ( npos != currpos ) + else { - if (npos < currpos) - { - if ( !rewind() || !skip(npos) ) - return LVERR_FAIL; - } - else - { - skip( npos - currpos ); - } + skip( npos - currpos ); } - if (newPos) - *newPos = npos; - return LVERR_OK; - } - virtual lverror_t Write(const void*, lvsize_t, lvsize_t*) - { - return LVERR_NOTIMPL; } - virtual lverror_t Read(void* buf, lvsize_t size, lvsize_t* pBytesRead) - { - lvsize_t bytesRead = 0; - //fprintf( stderr, "Read()\n" ); + if (newPos) + *newPos = npos; + return LVERR_OK; +} + +lverror_t LVBase64Stream::Read(void* buf, lvsize_t size, lvsize_t* pBytesRead) +{ + lvsize_t bytesRead = 0; + //fprintf( stderr, "Read()\n" ); - lUInt8 * out = (lUInt8 *)buf; + lUInt8 * out = (lUInt8 *)buf; - while (size>0) - { - int sz = bytesAvailable(); + while (size>0) + { + int sz = bytesAvailable(); + if (!sz) { + m_bytes_pos = m_bytes_count = 0; + sz = readNextBytes(); if (!sz) { - m_bytes_pos = m_bytes_count = 0; - sz = readNextBytes(); - if (!sz) { - if ( !bytesRead || m_pos!=m_size) // - return LVERR_FAIL; - break; - } + if ( !bytesRead || m_pos!=m_size) // + return LVERR_FAIL; + break; } - if (sz>(int)size) - sz = (int)size; - for (int i=0; i(int)size) + sz = (int)size; + for (int i=0; i TextLangMan::_lang_cfg_list; + +bool TextLangMan::_hyphenation_enabled = TEXTLANG_DEFAULT_HYPHENATION_ENABLED; +bool TextLangMan::_hyphenation_soft_hyphens_only = TEXTLANG_DEFAULT_HYPH_SOFT_HYPHENS_ONLY; +bool TextLangMan::_hyphenation_force_algorithmic = TEXTLANG_DEFAULT_HYPH_FORCE_ALGORITHMIC; +bool TextLangMan::_overridden_hyph_method = !TEXTLANG_DEFAULT_HYPHENATION_ENABLED + || TEXTLANG_DEFAULT_HYPH_SOFT_HYPHENS_ONLY + || TEXTLANG_DEFAULT_HYPH_FORCE_ALGORITHMIC ; +// These will be set when we can +HyphMethod * TextLangMan::_no_hyph_method = NULL; +HyphMethod * TextLangMan::_algo_hyph_method = NULL; +HyphMethod * TextLangMan::_soft_hyphens_method = NULL; + +TextLangMan::TextLangMan() { +} + +TextLangMan::~TextLangMan() { +} + +lUInt32 TextLangMan::getHash() { + lUInt32 hash = _main_lang.getHash(); + hash = hash << 4; + hash = hash + (_embedded_langs_enabled << 3); + hash = hash + (_hyphenation_soft_hyphens_only << 2); + hash = hash + (_hyphenation_force_algorithmic << 1); + hash = hash + _hyphenation_enabled; + // printf("TextLangMan::getHash %x\n", hash); + return hash; +} + +// No need to explicitely call this in frontend code. +// Calling HyphMan::uninit() will have this one called. +void TextLangMan::uninit() { + _lang_cfg_list.clear(); +} + +// For HyphMan legacy methods +void TextLangMan::setMainLangFromHyphDict( lString16 id ) { + // When setting up TextlangMan thru HyphMan legacy methods, + // disable embedded langs, for a consistent hyphenation. + TextLangMan::setEmbeddedLangsEnabled( false ); + // Update flags if asked for @none, @softhyphens or @algorithm + TextLangMan::setHyphenationEnabled( id != HYPH_DICT_ID_NONE ); + TextLangMan::setHyphenationSoftHyphensOnly( id == HYPH_DICT_ID_SOFTHYPHENS ); + TextLangMan::setHyphenationForceAlgorithmic( id == HYPH_DICT_ID_ALGORITHM ); + + for (int i=0; _hyph_dict_table[i].lang_tag!=NULL; i++) { + if ( id.startsWith( _hyph_dict_table[i].hyph_filename_prefix ) ) { + TextLangMan::setMainLang( lString16(_hyph_dict_table[i].lang_tag) ); + #ifdef DEBUG_LANG_USAGE + printf("TextLangMan::setMainLangFromHyphDict %s => %s\n", + UnicodeToLocal(id).c_str(), UnicodeToLocal(TextLangMan::getMainLang()).c_str()); + #endif + return; + } + } + printf("CRE WARNING: lang not found for hyphenation dict: %s\n", UnicodeToLocal(id).c_str()); +} + +// Used only by TextLangCfg +HyphMethod * TextLangMan::getHyphMethodForLang( lString16 lang_tag ) { + // Look for full lang_tag + for (int i=0; _hyph_dict_table[i].lang_tag!=NULL; i++) { + if ( lang_tag == lString16(_hyph_dict_table[i].lang_tag).lowercase() ) { + return HyphMan::getHyphMethodForDictionary( lString16(_hyph_dict_table[i].hyph_filename), + _hyph_dict_table[i].left_hyphen_min, _hyph_dict_table[i].right_hyphen_min); + } + } + // Look for lang_tag initial subpart + int m_pos = lang_tag.pos("-"); + if ( m_pos > 0 ) { + lString16 lang_tag2 = lang_tag.substr(0, m_pos); + for (int i=0; _hyph_dict_table[i].lang_tag!=NULL; i++) { + if ( lang_tag2 == lString16(_hyph_dict_table[i].lang_tag).lowercase() ) { + return HyphMan::getHyphMethodForDictionary( lString16(_hyph_dict_table[i].hyph_filename), + _hyph_dict_table[i].left_hyphen_min, _hyph_dict_table[i].right_hyphen_min); + } + } + } + // Fallback to English_US, as other languages are more likely to get mixed + // with english text (it feels better than using @algorithm) + return HyphMan::getHyphMethodForDictionary(TEXTLANG_FALLBACK_HYPH_DICT_ID); + +} + +// Return the (single and cached) TextLangCfg for the provided lang_tag +TextLangCfg * TextLangMan::getTextLangCfg( lString16 lang_tag ) { + if ( !_embedded_langs_enabled ) { + // Drop provided lang_tag: always return main lang TextLangCfg + lang_tag = _main_lang; + } + // Not sure if we can lowercase lang_tag and avoid duplicate (Harfbuzz might + // need the proper lang tag with some parts starting with some uppercase letter) + for ( int i=0; i<_lang_cfg_list.length(); i++ ) { + if ( _lang_cfg_list[i]->_lang_tag == lang_tag ) { + // printf("TextLangCfg %s reused\n", UnicodeToLocal(lang_tag).c_str()); + // There should rarely be more than 3 lang in a document, so move + // any requested far down in the list at top to shorten next loops. + if ( i > 2 ) { + _lang_cfg_list.move(0, i); + return _lang_cfg_list[0]; + } + return _lang_cfg_list[i]; + } + } + // Not found in cache: create it + TextLangCfg * lang_cfg = new TextLangCfg( lang_tag ); + _lang_cfg_list.add( lang_cfg ); // and cache it + return lang_cfg; +} + +TextLangCfg * TextLangMan::getTextLangCfg() { + // No lang_tag specified: return main lang one + return TextLangMan::getTextLangCfg( _main_lang ); +} + +TextLangCfg * TextLangMan::getTextLangCfg( ldomNode * node ) { + if ( !_embedded_langs_enabled || !node ) { + // No need to look at nodes: return main lang one + return TextLangMan::getTextLangCfg( _main_lang ); + } + if ( node->isText() ) + node = node->getParentNode(); + // We are usually called from renderFinalBlock() with a node that + // we know has a lang= attribute. + // But we may be called in other contexts (e.g. writeNodeEx) with + // any node: so, look at this node parents for that lang= attribute. + for ( ; !node->isRoot(); node = node->getParentNode() ) { + if ( node->hasAttribute( attr_lang ) ) { + lString16 lang_tag = node->getAttributeValue( attr_lang ); + if ( !lang_tag.empty() ) + return TextLangMan::getTextLangCfg( lang_tag ); + } + } + // No parent with lang= attribute: return main lang one + return TextLangMan::getTextLangCfg( _main_lang ); +} + +int TextLangMan::getLangNodeIndex( ldomNode * node ) { + if ( !_embedded_langs_enabled || !node ) { + // No need to look up if !_embedded_langs_enabled + return 0; + } + if ( node->isText() ) + node = node->getParentNode(); + for ( ; !node->isRoot(); node = node->getParentNode() ) { + if ( node->hasAttribute( attr_lang ) ) { + if ( !node->getAttributeValue( attr_lang ).empty() ) { + return node->getDataIndex(); + } + } + } + return 0; +} + +// For HyphMan::hyphenate() +HyphMethod * TextLangMan::getMainLangHyphMethod() { + return getTextLangCfg()->getHyphMethod(); +} + + +// TextLangCfg object: per language holder of language specificities + +#if USE_LIBUNIBREAK==1 +lChar16 lb_char_sub_func_polish(const lChar16 * text, int pos, int next_usable) { + // https://github.com/koreader/koreader/issues/5645#issuecomment-559193057 + // Letters aiouwzAIOUWS are prepositions that should not be left at the + // end of a line. + // Make them behave (for libunibreak) just like a opening paren (which + // being LBC_OP, will prevent a line break after it, even if followed + // by a space). + if ( pos >= 1 && text[pos-1] == ' ' ) { + switch ( text[pos] ) { + case 'A': + case 'I': + case 'O': + case 'U': + case 'W': + case 'Z': // Meaning in english: + case 'a': // and + case 'i': // and + case 'o': // about + case 'u': // at + case 'w': // in + case 'z': // with + return '('; + break; + default: + break; + } + } + return text[pos]; +} + +lChar16 lb_char_sub_func_czech_slovak(const lChar16 * text, int pos, int next_usable) { + // Same for Czech and Slovak : AIiVvOoUuSsZzKk + // https://tex.stackexchange.com/questions/27780/one-letter-word-at-the-end-of-line + // https://github.com/michal-h21/luavlna + if ( pos >= 1 && text[pos-1] == ' ' ) { + switch ( text[pos] ) { + case 'A': + case 'I': + case 'K': + case 'O': + case 'S': + case 'U': + case 'V': + case 'Z': + case 'i': + case 'k': + case 'o': + case 's': + case 'u': + case 'v': + case 'z': + return '('; + break; + default: + break; + } + } + return text[pos]; +} +#endif + +TextLangCfg::~TextLangCfg() { +} + +// Instantiate a new TextLangCfg with properties adequate to the provided lang_tag +TextLangCfg::TextLangCfg( lString16 lang_tag ) { + if ( TextLangMan::_no_hyph_method == NULL ) { + // We need to init static TextLangMan::_no_hyph_method and friends after + // HyphMan is set up. Do that here, even if unrelated, as TextLangCfg + // creation is called less often that every other methods around here. + TextLangMan::_no_hyph_method = HyphMan::getHyphMethodForDictionary(HYPH_DICT_ID_NONE); + TextLangMan::_soft_hyphens_method = HyphMan::getHyphMethodForDictionary(HYPH_DICT_ID_SOFTHYPHENS); + TextLangMan::_algo_hyph_method = HyphMan::getHyphMethodForDictionary(HYPH_DICT_ID_ALGORITHM); + } + + // Keep as our id the provided and non-lowercase'd lang_tag (with possibly bogus #@algorithm) + _lang_tag = lang_tag; + // Harfbuzz may know more than us about exotic/complex lang tags, + // so let it deal the the provided one as-is. + lString16 hb_lang_tag = lang_tag; + // Lowercase it for our tests + lang_tag.lowercase(); // (used by LANG_STARTS_WITH() macros) + + // Get hyph method/dictionary from _hyph_dict_table + _hyph_method = TextLangMan::getHyphMethodForLang(lang_tag); + + // Cleanup if we got "en#@something" from legacy HyphMan methods + int h_pos = lang_tag.pos("#"); + if ( h_pos > 0 ) { + lang_tag = lang_tag.substr(0, h_pos); + hb_lang_tag = hb_lang_tag.substr(0, h_pos); // Also clean the one for HB + } + #ifdef DEBUG_LANG_USAGE + printf("TextLangCfg %s created (%s %s)\n", UnicodeToLocal(_lang_tag).c_str(), + UnicodeToLocal(lang_tag).c_str(), UnicodeToLocal(_hyph_method->getId()).c_str()); + #endif + + // https://drafts.csswg.org/css-text-3/#script-tagging + // We might need to check for the script subpart (optional 2nd + // subpart) Lant, Hant, Hrkt... and make some non latin language + // with a Lant script behave more like latin languages... + + // Note that Harfbuzz seems to do the right same thing with + // either "zh-TW" and "zh-Hant". + + // See for more clever/complex handling of lang tags: + // https://android.googlesource.com/platform/frameworks/minikin/+/refs/heads/master/libs/minikin/Locale.cpp + + // We thought about adding a 2nd fallback font per-language, but it feels + // a bit wrong to limit this feature to documents with lang tags. + // Better to implement a generic font fallback chain independant of language. + + // https://unicode.org/reports/tr14/#Hyphen : in Polish and Portuguese, + // a real hyphen at end of line must be duplicated at start of next line. + _duplicate_real_hyphen_on_next_line = false; + +#if USE_HARFBUZZ==1 + _hb_language = hb_language_from_string(UnicodeToLocal(hb_lang_tag).c_str(), -1); +#endif + +#if USE_LIBUNIBREAK==1 + // libunibreak per-language LineBreakProperties extensions + // + // Rules extracted from libunibreak/src/linebreakdef.c, so we can adapt + // them and build LineBreakProperties adequately for more languages. + // See https://en.wikipedia.org/wiki/Quotation_mark + // These are mostly need only for languages that may add a space between + // the quote and its content - otherwise, the quote will be part of the + // word it sticks to, and break will be allowed on the other side which + // probably is a space. + // When a language allows the use of unpaired quotes (same quote on both + // sides), it seems best to not specify anything. + bool has_left_single_quotation_mark_opening = false; // U+2018 ‘ + bool has_left_single_quotation_mark_closing = false; + bool has_right_single_quotation_mark_opening = false; // U+2019 ’ + bool has_right_single_quotation_mark_closing = false; + bool has_right_single_quotation_mark_glue = false; + bool has_left_double_quotation_mark_opening = false; // U+201C “ + bool has_left_double_quotation_mark_closing = false; + bool has_right_double_quotation_mark_opening = false; // U+201D ” + bool has_right_double_quotation_mark_closing = false; + bool has_left_single_angle_quotation_mark_opening = false; // U+2039 ‹ + bool has_left_single_angle_quotation_mark_closing = false; + bool has_right_single_angle_quotation_mark_opening = false; // U+203A › + bool has_right_single_angle_quotation_mark_closing = false; + bool has_left_double_angle_quotation_mark_opening = false; // U+00AB « + bool has_left_double_angle_quotation_mark_closing = false; + bool has_right_double_angle_quotation_mark_opening = false; // U+00BB » + bool has_right_double_angle_quotation_mark_closing = false; + + // Note: these macros use 'lang_tag'. + if ( LANG_STARTS_WITH(("en")) ) { // English + has_left_single_quotation_mark_opening = true; // no right..closing in linebreakdef.c + has_left_double_quotation_mark_opening = true; + has_right_double_quotation_mark_closing = true; + } + else if ( LANG_STARTS_WITH(("fr") ("es")) ) { // French, Spanish + has_left_single_quotation_mark_opening = true; // no right..closing in linebreakdef.c + has_left_double_quotation_mark_opening = true; + has_right_double_quotation_mark_closing = true; + has_left_single_angle_quotation_mark_opening = true; + has_right_single_angle_quotation_mark_closing = true; + has_left_double_angle_quotation_mark_opening = true; + has_right_double_angle_quotation_mark_closing = true; + } + else if ( LANG_STARTS_WITH(("de")) ) { // German + has_left_single_quotation_mark_closing = true; + has_right_single_quotation_mark_glue = true; + has_left_double_quotation_mark_closing = true; + has_left_single_angle_quotation_mark_closing = true; + has_right_single_angle_quotation_mark_opening = true; + has_left_double_angle_quotation_mark_closing = true; + has_right_double_angle_quotation_mark_opening = true; + } + else if ( LANG_STARTS_WITH(("ru")) ) { // Russian + has_left_double_quotation_mark_closing = true; + has_left_double_angle_quotation_mark_opening = true; + has_right_double_angle_quotation_mark_closing = true; + } + else if ( LANG_STARTS_WITH(("zh")) ) { // Chinese + has_left_single_quotation_mark_opening = true; + has_right_single_quotation_mark_closing = true; + has_left_double_quotation_mark_opening = true; + has_right_double_quotation_mark_closing = true; + } + // Add languages rules here, or reuse previous one with other languages if needed. + + // Set up _lb_props. + // Important: the unicode indices must be in strict ascending order (or libunibreak + // might abort checking them all) + int n = 0; + if ( has_left_double_angle_quotation_mark_opening ) _lb_props[n++] = { 0x00AB, 0x00AB, LBP_OP }; + if ( has_left_double_angle_quotation_mark_closing ) _lb_props[n++] = { 0x00AB, 0x00AB, LBP_CL }; + // Soft-Hyphens are handled by Hyphman hyphenate(), have them handled as Zero-Width-Joiner by + // libunibreak so they don't allow any break and don't prevent hyphenate() to handle them correctly. + _lb_props[n++] = { 0x00AD, 0x00AD, LBP_ZWJ }; + if ( has_right_double_angle_quotation_mark_opening ) _lb_props[n++] = { 0x00BB, 0x00BB, LBP_OP }; + if ( has_right_double_angle_quotation_mark_closing ) _lb_props[n++] = { 0x00BB, 0x00BB, LBP_CL }; + if ( has_left_single_quotation_mark_opening ) _lb_props[n++] = { 0x2018, 0x2018, LBP_OP }; + if ( has_left_single_quotation_mark_closing ) _lb_props[n++] = { 0x2018, 0x2018, LBP_CL }; + if ( has_right_single_quotation_mark_opening ) _lb_props[n++] = { 0x2019, 0x2019, LBP_OP }; + if ( has_right_single_quotation_mark_closing ) _lb_props[n++] = { 0x2019, 0x2019, LBP_CL }; + if ( has_right_single_quotation_mark_glue ) _lb_props[n++] = { 0x2019, 0x2019, LBP_GL }; + if ( has_left_double_quotation_mark_opening ) _lb_props[n++] = { 0x201C, 0x201C, LBP_OP }; + if ( has_left_double_quotation_mark_closing ) _lb_props[n++] = { 0x201C, 0x201C, LBP_CL }; + if ( has_right_double_quotation_mark_opening ) _lb_props[n++] = { 0x201D, 0x201D, LBP_OP }; + if ( has_right_double_quotation_mark_closing ) _lb_props[n++] = { 0x201D, 0x201D, LBP_CL }; + if ( has_left_single_angle_quotation_mark_opening ) _lb_props[n++] = { 0x2039, 0x2039, LBP_OP }; + if ( has_left_single_angle_quotation_mark_closing ) _lb_props[n++] = { 0x2039, 0x2039, LBP_CL }; + if ( has_right_single_angle_quotation_mark_opening ) _lb_props[n++] = { 0x203A, 0x203A, LBP_OP }; + if ( has_right_single_angle_quotation_mark_closing ) _lb_props[n++] = { 0x203A, 0x203A, LBP_CL }; + // End of list + _lb_props[n++] = { 0, 0, LBP_Undefined }; + // Done with libunibreak per-language LineBreakProperties extensions + + // Other line breaking and text layout tweaks + _lb_char_sub_func = NULL; + if ( LANG_STARTS_WITH(("pl")) ) { // Polish + _lb_char_sub_func = &lb_char_sub_func_polish; + _duplicate_real_hyphen_on_next_line = true; + } + if ( LANG_STARTS_WITH(("cs") ("sk")) ) { // Czech, Slovak + _lb_char_sub_func = &lb_char_sub_func_czech_slovak; + } + if ( LANG_STARTS_WITH(("pt")) ) { // Portuguese + _duplicate_real_hyphen_on_next_line = true; + } +#endif +}