From 91bd926c36f5eded5aadc4ac44d9cb761b3e6fd7 Mon Sep 17 00:00:00 2001 From: mnater Date: Fri, 7 Aug 2020 17:32:01 +0200 Subject: [PATCH 1/4] feat: prevent small words and eol Problem: Some languages (e.g. polish) require not to have single letter words at the end of a line. Solution: Implement a optional class based setting, find small words and replace the space after them with a nbsp. This is easy if we just search for single letter words, but get somehow complicated when we open to any number of chars because we'll have handle overlapping regex matches. Notes: First implementation and test [skip travis-ci] --- Hyphenopoly.js | 14 ++++- testsuite/test50.html | 110 ++++++++++++++++++++++++++++++++++++++++ testsuite/testdriver.js | 3 +- 3 files changed, 125 insertions(+), 2 deletions(-) create mode 100644 testsuite/test50.html diff --git a/Hyphenopoly.js b/Hyphenopoly.js index 63590ae8..5a7ab2fc 100644 --- a/Hyphenopoly.js +++ b/Hyphenopoly.js @@ -170,7 +170,8 @@ ["mixedCase", true], ["orphanControl", 1], ["rightmin", 0], - ["rightminPerLang", 0] + ["rightminPerLang", 0], + ["smallWords", 0] ])); o.entries(selSettings).forEach( ([selSetting, setVal]) => { @@ -580,6 +581,17 @@ createOrphanController(sel) ); } + if (selSettings.smallWords > 0) { + const re = RegExp(`(?=(\\s\\S{1,${selSettings.smallWords}}\\s))`, "gu"); + const matches = []; + tn.replace(re, (orig, $1) => { + matches.push($1); + }); + matches.forEach((m) => { + tn = tn.replace(RegExp(`(\\s)${m.slice(1)}`), `$1${m.slice(1, -1)}\u00A0`); + }); + } + return tn; } diff --git a/testsuite/test50.html b/testsuite/test50.html new file mode 100644 index 00000000..8b3fdd3c --- /dev/null +++ b/testsuite/test50.html @@ -0,0 +1,110 @@ + + + + + Test 050 + + + + + + + +

Test 050

+

Do not wrap small words.

+
+
+

Yesterday I saw a brown cat in my backyard.

+

Yes­ter­day I saw a brown cat in my back­yard.

+ +

Yesterday I saw a brown cat in my backyard.

+

Yes­ter­day I saw a brown cat in my back­yard.

+ +

Yesterday I saw a brown cat in my backyard.

+

Yes­ter­day I saw a brown cat in my back­yard.

+ +

Yesterday I saw a brown cat in my backyard.

+

Yes­ter­day I saw a brown cat in my back­yard.

+ +

Yesterday I saw a brown cat in my backyard. Yesterday I saw a brown cat in my backyard.

+

Yes­ter­day I saw a brown cat in my back­yard. Yes­ter­day I saw a brown cat in my back­yard.

+
+
Test Ref
+ + + \ No newline at end of file diff --git a/testsuite/testdriver.js b/testsuite/testdriver.js index 0a596f34..1d34af0e 100644 --- a/testsuite/testdriver.js +++ b/testsuite/testdriver.js @@ -53,7 +53,8 @@ {"exec": true, "path": "test46.html"}, {"exec": true, "path": "test47.html"}, {"exec": true, "path": "test48.html"}, - {"exec": true, "path": "test49.html"} + {"exec": true, "path": "test49.html"}, + {"exec": true, "path": "test50.html"} ]; var testframe = document.getElementById("testframe"); var currentTest = 1; From 81479a0af6248b5e2646126e58473bc7f9ddef7a Mon Sep 17 00:00:00 2001 From: mnater Date: Fri, 7 Aug 2020 17:41:50 +0200 Subject: [PATCH 2/4] feat: add smallWords feat to module --- hyphenopoly.module.js | 11 +++++++++++ test/configurations.js | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/hyphenopoly.module.js b/hyphenopoly.module.js index 7d3be372..5f7836ac 100644 --- a/hyphenopoly.module.js +++ b/hyphenopoly.module.js @@ -526,6 +526,16 @@ function createTextHyphenator(lang) { orphanController ); } + if (H.c.smallWords > 0) { + const re = RegExp(`(?=(\\s\\S{1,${H.c.smallWords}}\\s))`, "gu"); + const matches = []; + tn.replace(re, (orig, $1) => { + matches.push($1); + }); + matches.forEach((m) => { + tn = tn.replace(RegExp(`(\\s)${m.slice(1)}`), `$1${m.slice(1, -1)}\u00A0`); + }); + } return tn; }); } @@ -669,6 +679,7 @@ H.config = ((userConfig) => { ["require", []], ["rightmin", 0], ["rightminPerLang", new Map()], + ["smallWords", 0], ["substitute", new Map()], ["sync", false] ])); diff --git a/test/configurations.js b/test/configurations.js index ac4b8257..632370d9 100644 --- a/test/configurations.js +++ b/test/configurations.js @@ -345,3 +345,37 @@ t.test("set options: orphanControl", function (t) { }); t.end(); }); + +t.test("set options: smallWords", function (t) { + let H9Y = null; + t.beforeEach(function setup(done) { + H9Y = require("../hyphenopoly.module"); + done(); + }); + + t.afterEach(function tearDown(done) { + H9Y = null; + delete require.cache[require.resolve("../hyphenopoly.module")]; + done(); + }); + + t.test("smallWords: 1", async function (t) { + const hyphenator = await H9Y.config({ + "hyphen": "•", + "require": ["en-us"], + "smallWords": 1 + }); + t.equal(hyphenator("Yesterday I saw a cat."), "Yes•ter•day I\u00A0saw a\u00A0cat."); + t.end(); + }); + t.test("smallWords: 2", async function (t) { + const hyphenator = await H9Y.config({ + "hyphen": "•", + "require": ["en-us"], + "smallWords": 2 + }); + t.equal(hyphenator("Yesterday I saw a cat in my backyard."), "Yes•ter•day I\u00A0saw a\u00A0cat in\u00A0my\u00A0back•yard."); + t.end(); + }); + t.end(); +}); From b2c93bea66ac7396a5059c6b20e4269c2cacf015 Mon Sep 17 00:00:00 2001 From: mnater Date: Fri, 7 Aug 2020 17:50:14 +0200 Subject: [PATCH 3/4] docs: add docs for smallWords [skip travis-ci] --- docs/Setup.md | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/docs/Setup.md b/docs/Setup.md index 68d8bb9b..44085c92 100644 --- a/docs/Setup.md +++ b/docs/Setup.md @@ -21,6 +21,7 @@ These page documents the optional fields in `setup`: * [minWordLength](#minwordlength) * [mixedCase](#mixedcase) * [orphanControl](#orphancontrol) + * [smallWords](#smallwords) ## Global Settings These settings apply to Hyphenopoly in general. @@ -524,3 +525,36 @@ There are three stages: 1. allow orphans 2. don't hyphenate the last word of an element 3. don't hyphenate the last word of an element and replace the space before with a no-breaking space + +### smallWords +```` +type: number +default: 0 +```` +Prevent small words at the end of a line by replacing the space after them with a no-breaking space (nbsp / uc: 00A0) +````html + +```` +The number defines what words are considered "small": + +`0`: no words are considered small. + +`1`: insert nbsp after single letter words (e.g. "I" or "a") + +`2`: insert nbsp after single and double letter words + +`3`: insert nbsp after words with 1, 2 or 3 letter + +and so on... From b252bab23a13546d3727f3db1ad3d3f2f69d24ce Mon Sep 17 00:00:00 2001 From: mnater Date: Sat, 8 Aug 2020 23:34:19 +0200 Subject: [PATCH 4/4] docs: enhance docs [skip travis-ci] --- docs/Setup.md | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/docs/Setup.md b/docs/Setup.md index 44085c92..b2faeaaa 100644 --- a/docs/Setup.md +++ b/docs/Setup.md @@ -549,12 +549,11 @@ var Hyphenopoly = { ```` The number defines what words are considered "small": -`0`: no words are considered small. - -`1`: insert nbsp after single letter words (e.g. "I" or "a") - -`2`: insert nbsp after single and double letter words - -`3`: insert nbsp after words with 1, 2 or 3 letter +| Number | Effect | +|:------:| ------ | +| `0` | no words are considered small. | +| `1` | insert nbsp after single letter words (e.g. "I" or "a"). | +| `2` | insert nbsp after single and double letter words. | +| `3` | insert nbsp after words with 1, 2 or 3 letters. | and so on...