Skip to content

Commit

Permalink
Avoid collapse leading and trailing emphatics (#1227)
Browse files Browse the repository at this point in the history
* Avoid collapse leading and trailing emphatics

* Lint

* Fix typing
  • Loading branch information
khaitruong922 authored Jul 14, 2024
1 parent b07e24c commit ee6185a
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 35 deletions.
45 changes: 28 additions & 17 deletions ext/js/language/ja/japanese.js
Original file line number Diff line number Diff line change
Expand Up @@ -747,29 +747,40 @@ export function isEmphaticCodePoint(codePoint) {
* @returns {string}
*/
export function collapseEmphaticSequences(text, fullCollapse) {
let result = '';
let collapseCodePoint = -1;
for (let i = 0; i < text.length; ++i) {
const char = text[i];
const c = char.codePointAt(0) ?? -1;
if (isEmphaticCodePoint(c)) {
// Prevent match trailing emphatic
if (i === text.length - 1) {
result += char;
continue;
}
let left = 0;
while (left < text.length && isEmphaticCodePoint(/** @type {number} */ (text.codePointAt(left)))) {
++left;
}
let right = text.length - 1;
while (right >= 0 && isEmphaticCodePoint(/** @type {number} */ (text.codePointAt(right)))) {
--right;
}
// Whole string is emphatic
if (left > right) {
return text;
}

const leadingEmphatics = text.substring(0, left);
const trailingEmphatics = text.substring(right + 1);
let middle = '';
let currentCollapsedCodePoint = -1;

if (collapseCodePoint !== c) {
collapseCodePoint = c;
for (let i = left; i <= right; ++i) {
const char = text[i];
const codePoint = /** @type {number} */ (char.codePointAt(0));
if (isEmphaticCodePoint(codePoint)) {
if (currentCollapsedCodePoint !== codePoint) {
currentCollapsedCodePoint = codePoint;
if (!fullCollapse) {
result += char;
middle += char;
continue;
}
}
} else {
collapseCodePoint = -1;
result += char;
currentCollapsedCodePoint = -1;
middle += char;
}
}
return result;

return leadingEmphatics + middle + trailingEmphatics;
}
51 changes: 33 additions & 18 deletions test/japanese-util.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -767,15 +767,6 @@ describe('Japanese utility functions', () => {
[['かっっっこい', false], 'かっこい'],
[['かっっっこい', true], 'かこい'],

[['こい', false], 'こい'],
[['こい', true], 'こい'],
[['っこい', false], 'っこい'],
[['っこい', true], 'こい'],
[['っっこい', false], 'っこい'],
[['っっこい', true], 'こい'],
[['っっっこい', false], 'っこい'],
[['っっっこい', true], 'こい'],

[['すごい', false], 'すごい'],
[['すごい', true], 'すごい'],
[['すごーい', false], 'すごーい'],
Expand All @@ -787,21 +778,45 @@ describe('Japanese utility functions', () => {
[['すっっごーーい', false], 'すっごーい'],
[['すっっごーーい', true], 'すごい'],

[['こい', false], 'こい'],
[['こい', true], 'こい'],
[['っこい', false], 'っこい'],
[['っこい', true], 'っこい'],
[['っっこい', false], 'っっこい'],
[['っっこい', true], 'っっこい'],
[['っっっこい', false], 'っっっこい'],
[['っっっこい', true], 'っっっこい'],
[['こいっ', false], 'こいっ'],
[['こいっ', true], 'こいっ'],
[['こいっっ', false], 'こいっっ'],
[['こいっっ', true], 'こいっっ'],
[['こいっっっ', false], 'こいっっっ'],
[['こいっっっ', true], 'こいっっっ'],
[['っこいっ', false], 'っこいっ'],
[['っこいっ', true], 'っこいっ'],
[['っっこいっっ', false], 'っっこいっっ'],
[['っっこいっっ', true], 'っっこいっっ'],
[['っっっこいっっっ', false], 'っっっこいっっっ'],
[['っっっこいっっっ', true], 'っっっこいっっっ'],

[['', false], ''],
[['', true], ''],
[['っ', false], 'っ'],
[['っ', true], 'っ'],
[['っっ', false], 'っっ'],
[['っっ', true], 'っ'],
[['っっっ', false], 'っっ'],
[['っっっ', true], 'っ'],
[['っっ', true], 'っっ'],
[['っっっ', false], 'っっっ'],
[['っっっ', true], 'っっっ'],

[['っーッかっこいいっーッ', false], 'っーッかっこいいっーッ'],
[['っーッかっこいいっーッ', true], 'っーッかこいいっーッ'],
[['っっーーッッかっこいいっっーーッッ', false], 'っっーーッッかっこいいっっーーッッ'],
[['っっーーッッかっこいいっっーーッッ', true], 'っっーーッッかこいいっっーーッッ'],

[['かっこいいっ', false], 'かっこいいっ'],
[['かっこいいっ', true], 'かこいいっ'],
[['かっこいいっっ', false], 'かっこいいっっ'],
[['かっこいいっっ', true], 'かこいいっ'],
[['かっこいいっっっ', false], 'かっこいいっっ'],
[['かっこいいっっっ', true], 'かこいいっ'],
[['っーッ', false], 'っーッ'],
[['っーッ', true], 'っーッ'],
[['っっーーッッ', false], 'っっーーッッ'],
[['っっーーッッ', true], 'っっーーッッ'],
];

test.each(data)('%o -> %o', (input, output) => {
Expand Down

0 comments on commit ee6185a

Please sign in to comment.