diff --git a/Hyphenopoly.js b/Hyphenopoly.js index e39498f8..6b45ed29 100644 --- a/Hyphenopoly.js +++ b/Hyphenopoly.js @@ -1,5 +1,5 @@ /** - * @license Hyphenopoly 2.4.0 - client side hyphenation for webbrowsers + * @license Hyphenopoly 2.5.0 - client side hyphenation for webbrowsers * ©2018 Mathias Nater, Zürich (mathiasnater at gmail dot com) * https://github.com/mnater/Hyphenopoly * @@ -428,23 +428,18 @@ } /** - * Hyphenate text in element - * @param {string} lang The language of the element - * @param {Object} elo The element-object - * @returns {undefined} + * Hyphenate an entitiy (text string or Element-Object) + * @param {string} lang - the language of the string + * @param {string} cn - the class of settings + * @param {string} entity - the entity to be hyphenated + * @returns {string | null} hyphenated string according to setting of cn */ - function hyphenateElement(lang, elo) { - const el = elo.element; + function hyphenate(lang, cn, entity) { const lo = H.languages[lang]; - const cn = elo.class; const classSettings = C[cn]; const minWordLength = classSettings.minWordLength; const normalize = C.normalize && Boolean(String.prototype.normalize); - H.events.dispatch("beforeElementHyphenation", { - "el": el, - "lang": lang - }); const poolKey = lang + "-" + cn; const wordHyphenator = (wordHyphenatorPool[poolKey]) ? wordHyphenatorPool[poolKey] @@ -453,37 +448,72 @@ ? orphanControllerPool[cn] : createOrphanController(cn); const re = lo.genRegExps[cn]; - let i = 0; - let n = el.childNodes[i]; - while (n) { - if ( - n.nodeType === 3 && - n.data.length >= minWordLength - ) { - let tn = null; - if (normalize) { - tn = n.data.normalize("NFC").replace(re, wordHyphenator); - } else { - tn = n.data.replace(re, wordHyphenator); - } - if (classSettings.orphanControl !== 1) { - tn = tn.replace( - /(\u0020*)(\S+)(\s*)$/, - orphanController - ); + + /** + * Hyphenate text according to setting in cn + * @param {string} text - the strint to be hyphenated + * @returns {string} hyphenated string according to setting of cn + */ + function hyphenateText(text) { + let tn = null; + if (normalize) { + tn = text.normalize("NFC").replace(re, wordHyphenator); + } else { + tn = text.replace(re, wordHyphenator); + } + if (classSettings.orphanControl !== 1) { + tn = tn.replace( + /(\u0020*)(\S+)(\s*)$/, + orphanController + ); + } + return tn; + } + + /** + * Hyphenate element according to setting in cn + * @param {object} el - the HTMLElement to be hyphenated + * @returns {undefined} + */ + function hyphenateElement(el) { + H.events.dispatch("beforeElementHyphenation", { + "el": el, + "lang": lang + }); + let i = 0; + let n = el.childNodes[i]; + while (n) { + if ( + n.nodeType === 3 && + n.data.length >= minWordLength + ) { + n.data = hyphenateText(n.data); } - n.data = tn; + i += 1; + n = el.childNodes[i]; } - i += 1; - n = el.childNodes[i]; + elements.counter[0] -= 1; + H.events.dispatch("afterElementHyphenation", { + "el": el, + "lang": lang + }); } - elements.counter[0] -= 1; - H.events.dispatch("afterElementHyphenation", { - "el": el, - "lang": lang - }); + let r = null; + if (typeof entity === "string") { + r = hyphenateText(entity); + } else if (entity instanceof HTMLElement) { + hyphenateElement(entity); + } + return r; } + H.createHyphenator = function createHyphenator(lang) { + return function hyphenator(entity, cn) { + cn = cn || "hyphenate"; + return hyphenate(lang, cn, entity); + }; + }; + /** * Hyphenate all elements with a given language * @param {string} lang The language @@ -493,7 +523,7 @@ function hyphenateLangElements(lang, elArr) { if (elArr) { elArr.forEach(function eachElem(elo) { - hyphenateElement(lang, elo); + hyphenate(lang, elo.class, elo.element); }); } else { H.events.dispatch("error", {"msg": "engine for language '" + lang + "' loaded, but no elements found."}); @@ -607,6 +637,15 @@ classSettings.rightmin ); } + + /* + * Find words with characters from `alphabet` and + * `Zero Width Non-Joiner` and `-` with a min length. + * + * This regexp is not perfect. It also finds parts of words + * that follow a character that is not in the `alphabet`. + * Word delimiters are not taken in account. + */ lo.genRegExps[cn] = new RegExp("[\\w" + alphabet + String.fromCharCode(8204) + "-]{" + classSettings.minWordLength + ",}", "gi"); }); lo.engineReady = true; @@ -787,12 +826,16 @@ const defRightmin = baseData.rightmin; const hyphenatedWordStore = (new Uint16Array(heapBuffer)).subarray( hyphenatedWordOffset >> 1, - (hyphenatedWordOffset >> 1) + 64 + (hyphenatedWordOffset >> 1) + 128 ); /* eslint-enable no-bitwise */ - return function hyphenate(word, hyphenchar, leftmin, rightmin) { + return function enclHyphenate(word, hyphenchar, leftmin, rightmin) { let i = 0; const wordLength = word.length; + if (wordLength > 61) { + H.events.dispatch("error", {"msg": "found word longer than 61 characters"}); + return word; + } leftmin = leftmin || defLeftmin; rightmin = rightmin || defRightmin; wordStore[0] = wordLength + 2; diff --git a/Hyphenopoly_Loader.js b/Hyphenopoly_Loader.js index dc2d6859..d7d81c67 100644 --- a/Hyphenopoly_Loader.js +++ b/Hyphenopoly_Loader.js @@ -1,5 +1,5 @@ /** - * @license Hyphenopoly_Loader 2.4.0 - client side hyphenation + * @license Hyphenopoly_Loader 2.5.0 - client side hyphenation * ©2018 Mathias Nater, Zürich (mathiasnater at gmail dot com) * https://github.com/mnater/Hyphenopoly * @@ -525,16 +525,67 @@ ); } + /** + * Expose the hyphenate-function of a specific language to + * Hyphenopoly.hyphenators. + * + * Hyphenopoly.hyphenators. is a Promise that fullfills + * to hyphenate(lang, cn, entity) as soon as the ressources are loaded + * and the engine is ready. + * If Promises aren't supported (e.g. IE11) a error message is produced. + * + * @param {string} lang - the language + * @returns {undefined} + */ + function exposeHyphenateFunction(lang) { + if (!H.hyphenators) { + H.hyphenators = {}; + } + if (!H.hyphenators[lang]) { + if (window.Promise) { + H.hyphenators[lang] = new Promise(function pro(rs, rj) { + H.events.addListener("engineReady", function handler(e) { + if (e.msg === lang) { + rs(H.createHyphenator(e.msg)); + } + }, true); + H.events.addListener("error", function handler(e) { + e.preventDefault(); + if (e.key === lang || e.key === "hyphenEngine") { + rj(e.msg); + } + }, true); + }); + } else { + H.hyphenators[lang] = { + + /** + * Fires an error message, if then is called + * @returns {undefined} + */ + "then": function () { + H.events.dispatch( + "error", + {"msg": "Promises not supported in this engine. Use a polyfill (e.g. https://github.com/taylorhakes/promise-polyfill)!"} + ); + } + }; + } + } + } + Object.keys(H.require).forEach(function doReqLangs(lang) { if (H.require[lang] === "FORCEHYPHENOPOLY") { H.clientFeat.polyfill = true; H.clientFeat.langs[lang] = "H9Y"; loadRessources(lang); + exposeHyphenateFunction(lang); } else if ( H.clientFeat.langs[lang] && H.clientFeat.langs[lang] === "H9Y" ) { loadRessources(lang); + exposeHyphenateFunction(lang); } else { tester.createTest(lang); } @@ -550,6 +601,7 @@ H.clientFeat.polyfill = true; H.clientFeat.langs[lang] = "H9Y"; loadRessources(lang); + exposeHyphenateFunction(lang); } } }); diff --git a/example.js b/example.js index 66044778..6cda6738 100644 --- a/example.js +++ b/example.js @@ -1,5 +1,9 @@ +// For RunKit: const hyphenopoly = require("hyphenopoly"); +// For local node: +// const hyphenopoly = require("./hyphenopoly.module.js"); + const hyphenator = hyphenopoly.config({ "require": ["de", "en-us"], "hyphen": "•", diff --git a/hyphenEngine.asm.js b/hyphenEngine.asm.js index ceef1f57..13594da2 100644 --- a/hyphenEngine.asm.js +++ b/hyphenEngine.asm.js @@ -1,5 +1,5 @@ /** - * @license hyphenEngine.asm.js 2.4.0 - client side hyphenation for webbrowsers + * @license hyphenEngine.asm.js 2.5.0 - client side hyphenation for webbrowsers * ©2018 Mathias Nater, Zürich (mathiasnater at gmail dot com) * https://github.com/mnater/Hyphenopoly * diff --git a/hyphenopoly.module.js b/hyphenopoly.module.js index 5e15198c..66fed2e4 100644 --- a/hyphenopoly.module.js +++ b/hyphenopoly.module.js @@ -1,5 +1,5 @@ /** - * @license Hyphenopoly.module.js 2.4.0 - hyphenation for node + * @license Hyphenopoly.module.js 2.5.0 - hyphenation for node * ©2018 Mathias Nater, Zürich (mathiasnater at gmail dot com) * https://github.com/mnater/Hyphenopoly * @@ -12,12 +12,12 @@ "use strict"; const fs = require("fs"); -const ut = require("util"); +const {StringDecoder} = require("string_decoder"); const decode = (function makeDecoder() { - const utf16ledecoder = new (ut.TextDecoder)("utf-16le"); + const utf16ledecoder = new StringDecoder("utf-16le"); return function dec(ui16) { - return utf16ledecoder.decode(ui16); + return utf16ledecoder.write(ui16); }; }()); @@ -57,12 +57,48 @@ function setProp(val, props) { const H = empty(); H.binaries = empty(); +/** + * Read a file and call callback + * Use "fs" (node) or "http" (browser) + * @param {string} file - the filename + * @param {function} cb - callback function + * @returns {undefined} + */ +function readFile(file, cb) { + fs.readFile(file, cb); +} + +/** + * Before using browserify comment-out or delete the readFile-function above + * and un-comment the following function. + * Also change `const fs = require("fs");` to `const http = require("http");` + * at the top of the file + */ + +/** + * Browserify-compatible readFile: + * function readFile(file, cb) { + * const rawData = []; + * http.get(file, function c(res) { + * res.on("data", function onData(chunk) { + * rawData.push(chunk); + * }); + * res.on("end", function onEnd() { + * cb(null, Buffer.concat(rawData)); + * }); + * res.on("error", function onErr(err) { + * cb(err, rawData); + * }); + * }); + * } + */ + /** * Read a wasm file, dispatch "engineLoaded" on success * @returns {undefined} */ function loadWasm() { - fs.readFile( + readFile( `${H.c.paths.maindir}hyphenEngine.wasm`, function cb(err, data) { if (err) { @@ -84,7 +120,7 @@ function loadWasm() { * @returns {undefined} */ function loadHpb(lang) { - fs.readFile( + readFile( `${H.c.paths.patterndir}${lang}.hpb`, function cb(err, data) { if (err) { @@ -284,7 +320,7 @@ function encloseHyphenateFunction(baseData, hyphenateFunc) { const defRightmin = baseData.rightmin; const hyphenatedWordStore = (new Uint16Array(heapBuffer)).subarray( hyphenatedWordOffset >> 1, - (hyphenatedWordOffset >> 1) + 64 + (hyphenatedWordOffset >> 1) + 128 ); /* eslint-enable no-bitwise */ @@ -301,6 +337,13 @@ function encloseHyphenateFunction(baseData, hyphenateFunc) { return function hyphenate(word, hyphenchar, leftmin, rightmin) { let i = 0; const wordLength = word.length; + if (wordLength > 61) { + H.events.dispatch( + "error", + {"msg": "found word longer than 61 characters"} + ); + return word; + } leftmin = leftmin || defLeftmin; rightmin = rightmin || defRightmin; wordStore[0] = wordLength + 2; diff --git a/package.json b/package.json index 8a5b1fdc..a0c9d462 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "hyphenopoly", - "version": "2.4.0", + "version": "2.5.0", "description": "Hyphenation for node and Polyfill for client-side hyphenation.", "keywords": [ "hyphenation", @@ -29,7 +29,6 @@ "example.js", "example.html" ], - "browser": "Hyphenopoly_Loader.js", "main": "hyphenopoly.module.js", "engines": { "node": ">=8.3.0" @@ -154,7 +153,7 @@ "compile-binaryen": "cd ./third-party/binaryen/ && cmake . && make", "compile-wabt": "cd ./third-party/wabt/ && make", "compilewasm": "sh ./tools/compileWASM.sh hyphenEngine", - "test": "tap test/*.js", + "test": "tap test/*.js --coverage", "testsuite": "open http://127.0.0.1/~mnater/Hyphenopoly/testsuite/ && open http://127.0.0.1/~mnater/Hyphenopoly/min/testsuite/", "lint": "eslint Hyphenopoly_Loader.js Hyphenopoly.js hyphenopoly.module.js test/*.js", "prepare": "npm run minify", diff --git a/test/errors.js b/test/errors.js index 8f4807f4..95443780 100644 --- a/test/errors.js +++ b/test/errors.js @@ -76,3 +76,12 @@ t.test("make hyphenEngine fail", async function (t) { }); t.end(); }); + +t.test("fail when word is to long", async function (t) { + const nlHyphenator = await H9Y.config({"require": ["nl"]}); + t.test("hyphenate one word", function (t) { + t.equal(nlHyphenator("Kindercarnavalsoptochtvoorbereidingswerkzaamhedenplankindercarnavals"), "Kindercarnavalsoptochtvoorbereidingswerkzaamhedenplankindercarnavals"); + t.end(); + }); + t.end(); +}); diff --git a/testsuite/test25.html b/testsuite/test25.html new file mode 100644 index 00000000..c6640353 --- /dev/null +++ b/testsuite/test25.html @@ -0,0 +1,92 @@ + + + + + Test 025 + + + + + + + +

Test 025

+

Hyphenate long word

+
+
+

Kindercarnavalsoptochtvoorbereidingswerkzaamhedenplan Kindercarnavalsoptochtvoorbereidingswerkzaamhedenplankinderca Kindercarnavalsoptochtvoorbereidingswerkzaamhedenplankindercarnavals

+

Kin•der•car•na•vals•op•tocht•voor•be•rei•dings•werk•zaam•he•den•plan Kin•der•car•na•vals•op•tocht•voor•be•rei•dings•werk•zaam•he•den•plan•kin•der•ca Kindercarnavalsoptochtvoorbereidingswerkzaamhedenplankindercarnavals

+ +
+
Test Ref
+ + + \ No newline at end of file diff --git a/testsuite/test26.html b/testsuite/test26.html new file mode 100644 index 00000000..2c89e494 --- /dev/null +++ b/testsuite/test26.html @@ -0,0 +1,135 @@ + + + + + Test 026 + + + + + + + + +

Test 026

+

Hyphenate manually (uses promises): FORCE

+
+
+

Silbentrennungsalgorithmus

+

Sil•ben•tren•nungs•al•go•rith•mus

+

+

Sil•ben•tren•nung ver•bes•sert den Block•satz.

+
+
Test Ref
+ + + \ No newline at end of file diff --git a/testsuite/test27.html b/testsuite/test27.html new file mode 100644 index 00000000..b037bf28 --- /dev/null +++ b/testsuite/test27.html @@ -0,0 +1,129 @@ + + + + + Test 027 + + + + + + + +

Test 027

+

Hyphenate manually (uses promises): check

+
+
+

fur

+

+

Il fur|lan e je une len|ghe ro|man|ze de fa|mee des len|ghis re|ti|chis, che e je fe|ve|la|de so|re|dut in|tal Friûl, ma an|cje vie pal mont. Cual|chi vol|te al ven clamât La|din orientâl, parcè che al à di|viers ponts in co|mun cul la|din, ma si è svi|lupât in un al|tri mût cul pa|ssâ dal timp, sot de in|fluen|ce des len|ghis ator dal Friûl

+
+
Test Ref
+ + \ No newline at end of file diff --git a/testsuite/testdriver.js b/testsuite/testdriver.js index 7fa438a0..a9ae1d92 100644 --- a/testsuite/testdriver.js +++ b/testsuite/testdriver.js @@ -29,7 +29,10 @@ {exec: true, path: "test21.html"}, {exec: true, path: "test22.html"}, {exec: true, path: "test23.html"}, - {exec: true, path: "test24.html"} + {exec: true, path: "test24.html"}, + {exec: true, path: "test25.html"}, + {exec: true, path: "test26.html"}, + {exec: true, path: "test27.html"} ]; var testframe = document.getElementById("testframe"); var currentTest = 1; diff --git a/tools/minify.sh b/tools/minify.sh index 08873d6e..e9dd4038 100644 --- a/tools/minify.sh +++ b/tools/minify.sh @@ -8,5 +8,5 @@ terser Hyphenopoly.js -o min/Hyphenopoly.js --comments -c -m --warn terser hyphenEngine.asm.js -o min/hyphenEngine.asm.js --comments -c -m --warn --verbose cp hyphenEngine.wasm min/hyphenEngine.wasm -cp -R patterns min/patterns -cp -R testsuite min/testsuite \ No newline at end of file +cp -R patterns/ min/patterns +cp -R testsuite/ min/testsuite \ No newline at end of file diff --git a/tools/tex2hpb.js b/tools/tex2hpb.js index 00bd4734..440ecccd 100644 --- a/tools/tex2hpb.js +++ b/tools/tex2hpb.js @@ -6,7 +6,7 @@ * (https://github.com/mnater/Hyphenopoly) * * Usage: - * # node tex2hpb.js license.txt characters.txt patterns.txt [exceptions.txt] + * # node tex2hpb.js license.txt characters.txt patterns.txt [exceptions.txt | null] outname * * This creates a new file called input.hpb in pwd * @@ -54,6 +54,12 @@ * _10t10a11b10l10e10_ * _10p10r10o10j10e10c10t10_ * So "tables" will not by hyphenated by this pattern. + * + * If there's no exceptions file, use "null" as a placeholder. + * + * outname + * outname (typically the language code) is the filename where patterns will + * be stored. The .hpb ending is added automatically. */ /* Binary format: .hpb (hyphenopoly patterns binary)