Skip to content

Commit

Permalink
Simplify string normalization using modern JS functions
Browse files Browse the repository at this point in the history
Simplify the utility for normalizing strings now that
`String.prototype.normalize` and `\p` escapes are widely available.
  • Loading branch information
robertknight committed Nov 22, 2024
1 parent 1870fa8 commit afa68e9
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 89 deletions.
18 changes: 5 additions & 13 deletions h/static/scripts/controllers/search-bar-controller.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import escapeHtml from 'escape-html';
import { Controller } from '../base/controller';
import { cloneTemplate } from '../util/dom';
import { getLozengeValues, shouldLozengify } from '../util/search-text-parser';
import * as stringUtil from '../util/string';
import { stripMarks } from '../util/string';

import { AutosuggestDropdownController } from './autosuggest-dropdown-controller';
import { LozengeController } from './lozenge-controller';
Expand All @@ -13,14 +13,6 @@ const TAG_TYPE = 'TAG';
const GROUP_TYPE = 'GROUP';
const MAX_SUGGESTIONS = 5;

/**
* Normalize a string for use in comparisons of user input with a suggestion.
* This causes differences in unicode composition and combining characters/accents to be ignored.
*/
const normalizeStr = function (str) {
return stringUtil.fold(stringUtil.normalize(str));
};

/**
* Controller for the search bar.
*/
Expand Down Expand Up @@ -85,7 +77,7 @@ export class SearchBarController extends Controller {
return Object.assign(item, {
type: TAG_TYPE,
title: item.tag, // make safe
matchOn: normalizeStr(item.tag),
matchOn: stripMarks(item.tag),
usageCount: item.count || 0,
});
});
Expand Down Expand Up @@ -113,7 +105,7 @@ export class SearchBarController extends Controller {
return Object.assign(item, {
type: GROUP_TYPE,
title: item.name, // make safe
matchOn: normalizeStr(item.name),
matchOn: stripMarks(item.name),
pubid: item.pubid,
name: item.name,
relationship: item.relationship,
Expand Down Expand Up @@ -161,7 +153,7 @@ export class SearchBarController extends Controller {
groupVal = groupVal.slice(0, -1);
}

const matchVal = normalizeStr(groupVal).toLowerCase();
const matchVal = stripMarks(groupVal).toLowerCase();

// NOTE: We are pushing a pubid to lowercase here. These ids are created by us
// in a random generation case-sensistive style. Theoretically, that means
Expand Down Expand Up @@ -364,7 +356,7 @@ export class SearchBarController extends Controller {
typeFilter = GROUP_TYPE;
}

let inputFilter = normalizeStr(currentInput);
let inputFilter = stripMarks(currentInput);

if (typeFilter === TAG_TYPE || typeFilter === GROUP_TYPE) {
inputFilter = inputFilter.substr(inputFilter.indexOf(':') + 1);
Expand Down
28 changes: 14 additions & 14 deletions h/static/scripts/tests/util/string-test.js
Original file line number Diff line number Diff line change
@@ -1,58 +1,58 @@
import * as stringUtil from '../../util/string';
import { hyphenate, unhyphenate, stripMarks } from '../../util/string';

describe('util/string', () => {
describe('hyphenate', () => {
it('converts input to kebab-case', () => {
assert.equal(stringUtil.hyphenate('fooBar'), 'foo-bar');
assert.equal(stringUtil.hyphenate('FooBar'), '-foo-bar');
assert.equal(hyphenate('fooBar'), 'foo-bar');
assert.equal(hyphenate('FooBar'), '-foo-bar');
});
});

describe('unhyphenate', () => {
it('converts input to camelCase', () => {
assert.equal(stringUtil.unhyphenate('foo-bar'), 'fooBar');
assert.equal(stringUtil.unhyphenate('foo-bar-'), 'fooBar');
assert.equal(stringUtil.unhyphenate('foo-bar-baz'), 'fooBarBaz');
assert.equal(stringUtil.unhyphenate('-foo-bar-baz'), 'FooBarBaz');
assert.equal(unhyphenate('foo-bar'), 'fooBar');
assert.equal(unhyphenate('foo-bar-'), 'fooBar');
assert.equal(unhyphenate('foo-bar-baz'), 'fooBarBaz');
assert.equal(unhyphenate('-foo-bar-baz'), 'FooBarBaz');
});
});

describe('stringUtil helpers', () => {
describe('stripAccents', () => {
it('removes hungarian marks', () => {
const text = 'Fürge rőt róka túlszökik zsíros étkű kutyán';
const decoded = stringUtil.fold(stringUtil.normalize(text));
const decoded = stripMarks(text);
const expected = 'Furge rot roka tulszokik zsiros etku kutyan';

assert.equal(decoded, expected);
});

it('removes greek marks', () => {
const text = 'Καλημέρα κόσμε';
const decoded = stringUtil.fold(stringUtil.normalize(text));
const decoded = stripMarks(text);
const expected = 'Καλημερα κοσμε';

assert.equal(decoded, expected);
});

it('removes japanese marks', () => {
const text = 'カタカナコンバータ';
const decoded = stringUtil.fold(stringUtil.normalize(text));
const decoded = stripMarks(text);
const expected = 'カタカナコンハータ';

assert.equal(decoded, expected);
});

it('removes marathi marks', () => {
const text = 'काचं शक्नोम्यत्तुम';
const decoded = stringUtil.fold(stringUtil.normalize(text));
const decoded = stripMarks(text);
const expected = 'कच शकनमयततम';

assert.equal(decoded, expected);
});

it('removes thai marks', () => {
const text = 'ฉันกินกระจกได้ แต่มันไม่ทำให้ฉันเจ็บ';
const decoded = stringUtil.fold(stringUtil.normalize(text));
const decoded = stripMarks(text);
const expected = 'ฉนกนกระจกได แตมนไมทาใหฉนเจบ';

assert.equal(decoded, expected);
Expand All @@ -61,7 +61,7 @@ describe('util/string', () => {
it('removes all marks', () => {
const text =
'̀ ́ ̂ ̃ ̄ ̅ ̆ ̇ ̈ ̉ ̊ ̋ ̌ ̍ ̎ ̏ ̐ ̑ ̒ ̓ ̔ ̕ ̖ ̗ ̘ ̙ ̚ ̛ ̜ ̝ ̞ ̟ ̠ ̡ ̢ ̣ ̤ ̥ ̦ ̧ ̨ ̩ ̪ ̫ ̬ ̭ ̮ ̯ ̰ ̱ ̲ ̳ ̴ ̵ ̶ ̷ ̸ ̹ ̺ ̻ ̼ ̽ ̾ ̿ ̀ ́ ͂ ̓ ̈́ ͅ ͠ ͡"';
const decoded = stringUtil.fold(stringUtil.normalize(text));
const decoded = stripMarks(text);
const expected =
' "';

Expand Down
62 changes: 0 additions & 62 deletions h/static/scripts/util/string.js

This file was deleted.

28 changes: 28 additions & 0 deletions h/static/scripts/util/string.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/**
* Convert a `camelCase` or `CapitalCase` string to `kebab-case`
*/
export function hyphenate(name: string) {
const uppercasePattern = /([A-Z])/g;
return name.replace(uppercasePattern, '-$1').toLowerCase();
}

/** Convert a `kebab-case` string to `camelCase` */
export function unhyphenate(name: string) {
const idx = name.indexOf('-');
if (idx === -1) {
return name;
} else {
const ch = (name[idx + 1] || '').toUpperCase();
return unhyphenate(name.slice(0, idx) + ch + name.slice(idx + 2));
}
}

/**
* Convert a string into NFKD normalization form and remove marks (accents etc.)
*
* This function is used to normalize strings before search to ignore
* differences in accents etc.
*/
export function stripMarks(str: string) {
return str.normalize('NFKD').replace(/\p{M}/gu, '');
}

0 comments on commit afa68e9

Please sign in to comment.