Skip to content

Commit

Permalink
add language select, abstract text transformations (#584)
Browse files Browse the repository at this point in the history
* Copy functions from JapaneseUtil

* Remove JapaneseUtil

* Update usages of JapaneseUtil functions

* part1

* frotend done?

* fix tests

* offscreen and type complications

* add tests

* start fixing tests

* keep fixing tests

* fix tests

* Copy functions from JapaneseUtil

* Remove JapaneseUtil

* Update usages of JapaneseUtil functions

* delete pt

* renames

* add tests

* kebab-case filenames

* lint

* minor fixes

* merge

* fixes

* fix part of comments

* fix more comments

* delete unused types

* comment

* comment

* do backend

* other files

* move fetch utils to own file

* remove extra line

* add extra line

* remove unnecessary export

* simplify folder structure

* remove redundant async

* fix param type in api

* fix language index

* undo changes to cssStyleApplier

* undo changes to utilities.js

* undo changes to utilities.js

* simplify language util

* lint

* undo phantom changes to anki integration

* require textTransformations options

* explicit locale in localeCompare

* punctuate notes

* prefer early exit

* rename LanguageOptionsObjectMap

* rename to textPreprocessor

* tuple with names instead of boolean array

* safe data setting

* optional chaining

* simplify LanguageOptions

* encapsulate languages

* delete language util

* nullable language in text preprocessors controller

* rename transform to process

* remove settings

* make translation advanced again

* remove unused getTextTransformations api call

* comments

* change language types

* RIP flags

* comments

* fix tests

* lint

* Text preprocessor type changes (FooSoft#10)

* Add types

* Update types

* Simplify type check

* Refactor typing and structuring of language definitions

* lint

* update translator benchmark

* undo markdown changes

* undo markdown changes

* undo markdown changes

* more merge

* simplify language controller

---------

Co-authored-by: toasted-nutbread <[email protected]>
Co-authored-by: Darius Jahandarie <[email protected]>
  • Loading branch information
3 people authored Feb 17, 2024
1 parent 9432e4b commit e24b2ae
Show file tree
Hide file tree
Showing 32 changed files with 1,648 additions and 387 deletions.
4 changes: 4 additions & 0 deletions .eslintrc.json
Original file line number Diff line number Diff line change
Expand Up @@ -605,9 +605,13 @@
"ext/js/general/object-property-accessor.js",
"ext/js/general/regex-util.js",
"ext/js/general/text-source-map.js",
"ext/js/language/en/language-english.js",
"ext/js/language/ja/japanese-wanakana.js",
"ext/js/language/ja/japanese.js",
"ext/js/language/ja/language-japanese.js",
"ext/js/language/language-transformer.js",
"ext/js/language/languages.js",
"ext/js/language/text-preprocessors.js",
"ext/js/language/translator.js",
"ext/js/media/audio-downloader.js",
"ext/js/media/media-util.js",
Expand Down
15 changes: 3 additions & 12 deletions benches/translator.bench.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ import {fileURLToPath} from 'node:url';
import path from 'path';
import {bench, describe} from 'vitest';
import {parseJson} from '../dev/json.js';
import {createFindKanjiOptions, createFindTermsOptions} from '../test/utilities/translator.js';
import {createTranslatorContext} from '../test/fixtures/translator-test.js';
import {createFindKanjiOptions, createFindTermsOptions} from '../test/utilities/translator.js';

const dirname = path.dirname(fileURLToPath(import.meta.url));
const dictionaryName = 'Test Dictionary 2';
Expand All @@ -33,25 +33,16 @@ describe('Translator', () => {
const {optionsPresets, tests} = parseJson(readFileSync(testInputsFilePath, {encoding: 'utf8'}));

const findKanjiTests = tests.filter((data) => data.options === 'kanji');
const findTermTests = tests.filter((data) => data.options === 'default');
const findTermWithTextTransformationsTests = tests.filter((data) => data.options !== 'kanji' && data.options !== 'default');
const findTermTests = tests.filter((data) => data.options !== 'kanji');

bench(`Translator.prototype.findTerms - no text transformations (n=${findTermTests.length})`, async () => {
bench(`Translator.prototype.findTerms - (n=${findTermTests.length})`, async () => {
for (const data of /** @type {import('test/translator').TestInputFindTerm[]} */ (findTermTests)) {
const {mode, text} = data;
const options = createFindTermsOptions(dictionaryName, optionsPresets, data.options);
await translator.findTerms(mode, text, options);
}
});

bench(`Translator.prototype.findTerms - text transformations (n=${findTermWithTextTransformationsTests.length})`, async () => {
for (const data of /** @type {import('test/translator').TestInputFindTerm[]} */ (findTermWithTextTransformationsTests)) {
const {mode, text} = data;
const options = createFindTermsOptions(dictionaryName, optionsPresets, data.options);
await translator.findTerms(mode, text, options);
}
});

bench(`Translator.prototype.findKanji - (n=${findKanjiTests.length})`, async () => {
for (const data of /** @type {import('test/translator').TestInputFindKanji[]} */ (findKanjiTests)) {
const {text} = data;
Expand Down
3 changes: 3 additions & 0 deletions dev/jsconfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@
"error": ["../types/ext/error"],
"event-listener-collection": ["../types/ext/event-listener-collection"],
"japanese-util": ["../types/ext/japanese-util"],
"language": ["../types/ext/language"],
"language-english": ["../types/ext/language-english"],
"language-japanese": ["../types/ext/language-japanese"],
"ext/json-schema": ["../types/ext/json-schema"],
"language-transformer": ["../types/ext/language-transformer"],
"language-transformer-internal": ["../types/ext/language-transformer-internal"],
Expand Down
120 changes: 60 additions & 60 deletions docs/anki-integration.md

Large diffs are not rendered by default.

41 changes: 5 additions & 36 deletions ext/data/schemas/options-schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@
"type": "object",
"required": [
"enable",
"language",
"resultOutputMode",
"debugInfo",
"maxResults",
Expand Down Expand Up @@ -126,6 +127,10 @@
"type": "boolean",
"default": true
},
"language": {
"type": "string",
"default": "ja"
},
"resultOutputMode": {
"type": "string",
"enum": ["group", "merge", "split"],
Expand Down Expand Up @@ -722,12 +727,6 @@
"translation": {
"type": "object",
"required": [
"convertHalfWidthCharacters",
"convertNumericCharacters",
"convertAlphabeticCharacters",
"convertHiraganaToKatakana",
"convertKatakanaToHiragana",
"collapseEmphaticSequences",
"textReplacements",
"searchResolution"
],
Expand All @@ -740,36 +739,6 @@
],
"default": "letter"
},
"convertHalfWidthCharacters": {
"type": "string",
"enum": ["false", "true", "variant"],
"default": "false"
},
"convertNumericCharacters": {
"type": "string",
"enum": ["false", "true", "variant"],
"default": "false"
},
"convertAlphabeticCharacters": {
"type": "string",
"enum": ["false", "true", "variant"],
"default": "false"
},
"convertHiraganaToKatakana": {
"type": "string",
"enum": ["false", "true", "variant"],
"default": "false"
},
"convertKatakanaToHiragana": {
"type": "string",
"enum": ["false", "true", "variant"],
"default": "variant"
},
"collapseEmphaticSequences": {
"type": "string",
"enum": ["false", "true", "full"],
"default": "false"
},
"textReplacements": {
"type": "object",
"required": [
Expand Down
26 changes: 11 additions & 15 deletions ext/js/background/backend.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import {DictionaryDatabase} from '../dictionary/dictionary-database.js';
import {Environment} from '../extension/environment.js';
import {ObjectPropertyAccessor} from '../general/object-property-accessor.js';
import {distributeFuriganaInflected, isCodePointJapanese, isStringPartiallyJapanese, convertKatakanaToHiragana as jpConvertKatakanaToHiragana} from '../language/ja/japanese.js';
import {getLanguageSummaries} from '../language/languages.js';
import {Translator} from '../language/translator.js';
import {AudioDownloader} from '../media/audio-downloader.js';
import {getFileExtensionFromAudioMediaType, getFileExtensionFromImageMediaType} from '../media/media-util.js';
Expand Down Expand Up @@ -183,7 +184,8 @@ export class Backend {
['textHasJapaneseCharacters', this._onApiTextHasJapaneseCharacters.bind(this)],
['getTermFrequencies', this._onApiGetTermFrequencies.bind(this)],
['findAnkiNotes', this._onApiFindAnkiNotes.bind(this)],
['openCrossFramePort', this._onApiOpenCrossFramePort.bind(this)]
['openCrossFramePort', this._onApiOpenCrossFramePort.bind(this)],
['getLanguageSummaries', this._onApiGetLanguageSummaries.bind(this)]
]);
/* eslint-enable @stylistic/no-multi-spaces */

Expand Down Expand Up @@ -906,6 +908,11 @@ export class Backend {
return {targetTabId, targetFrameId};
}

/** @type {import('api').ApiHandler<'getLanguageSummaries'>} */
_onApiGetLanguageSummaries() {
return getLanguageSummaries();
}

// Command handlers

/**
Expand Down Expand Up @@ -2361,15 +2368,9 @@ export class Backend {
if (typeof deinflect !== 'boolean') { deinflect = true; }
const enabledDictionaryMap = this._getTranslatorEnabledDictionaryMap(options);
const {
general: {mainDictionary, sortFrequencyDictionary, sortFrequencyDictionaryOrder},
general: {mainDictionary, sortFrequencyDictionary, sortFrequencyDictionaryOrder, language},
scanning: {alphanumeric},
translation: {
convertHalfWidthCharacters,
convertNumericCharacters,
convertAlphabeticCharacters,
convertHiraganaToKatakana,
convertKatakanaToHiragana,
collapseEmphaticSequences,
textReplacements: textReplacementsOptions,
searchResolution
}
Expand All @@ -2394,16 +2395,11 @@ export class Backend {
sortFrequencyDictionary,
sortFrequencyDictionaryOrder,
removeNonJapaneseCharacters: !alphanumeric,
convertHalfWidthCharacters,
convertNumericCharacters,
convertAlphabeticCharacters,
convertHiraganaToKatakana,
convertKatakanaToHiragana,
collapseEmphaticSequences,
searchResolution,
textReplacements,
enabledDictionaryMap,
excludeDictionaryDefinitions
excludeDictionaryDefinitions,
language
};
}

Expand Down
7 changes: 7 additions & 0 deletions ext/js/comm/api.js
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,13 @@ export class API {
return this._invoke('openCrossFramePort', {targetTabId, targetFrameId});
}

/**
* @returns {Promise<import('api').ApiReturn<'getLanguageSummaries'>>}
*/
getLanguageSummaries() {
return this._invoke('getLanguageSummaries', void 0);
}

// Utilities

/**
Expand Down
28 changes: 27 additions & 1 deletion ext/js/data/options-util.js
Original file line number Diff line number Diff line change
Expand Up @@ -522,7 +522,8 @@ export class OptionsUtil {
this._updateVersion22,
this._updateVersion23,
this._updateVersion24,
this._updateVersion25
this._updateVersion25,
this._updateVersion26
];
/* eslint-enable @typescript-eslint/unbound-method */
if (typeof targetVersion === 'number' && targetVersion < result.length) {
Expand Down Expand Up @@ -1154,6 +1155,31 @@ export class OptionsUtil {
}
}

/**
* - Added general.language.
* - Modularized text preprocessors.
* @type {import('options-util').UpdateFunction}
*/
_updateVersion26(options) {
const textPreprocessors = [
'convertHalfWidthCharacters',
'convertNumericCharacters',
'convertAlphabeticCharacters',
'convertHiraganaToKatakana',
'convertKatakanaToHiragana',
'collapseEmphaticSequences'
];

for (const {options: profileOptions} of options.profiles) {
profileOptions.general.language = 'ja';

for (const preprocessor of textPreprocessors) {
delete profileOptions.translation[preprocessor];
}
}
}


/**
* @param {string} url
* @returns {Promise<chrome.tabs.Tab>}
Expand Down
29 changes: 29 additions & 0 deletions ext/js/language/en/language-english.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/*
* Copyright (C) 2024 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/

import {capitalizeFirstLetter, decapitalize} from '../text-preprocessors.js';

/** @type {import('language-english').EnglishLanguageDescriptor} */
export const descriptor = {
name: 'English',
iso: 'en',
exampleText: 'read',
textPreprocessors: {
capitalizeFirstLetter,
decapitalize
}
};
77 changes: 77 additions & 0 deletions ext/js/language/ja/language-japanese.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
/*
* Copyright (C) 2024 Yomitan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/

import {basicTextPreprocessorOptions} from '../text-preprocessors.js';
import {convertAlphabeticToKana} from './japanese-wanakana.js';
import {collapseEmphaticSequences, convertHalfWidthKanaToFullWidth, convertHiraganaToKatakana, convertKatakanaToHiragana, convertNumericToFullWidth} from './japanese.js';

/** @type {import('language-japanese').JapaneseLanguageDescriptor} */
export const descriptor = {
name: 'Japanese',
iso: 'ja',
exampleText: '読め',
textPreprocessors: {
convertHalfWidthCharacters: {
name: 'Convert half width characters to full width',
description: 'ヨミチャン → ヨミチャン',
options: basicTextPreprocessorOptions,
/** @type {import('language').TextPreprocessorFunction<boolean>} */
process: (str, setting, sourceMap) => (setting ? convertHalfWidthKanaToFullWidth(str, sourceMap) : str)
},
convertNumericCharacters: {
name: 'Convert numeric characters to full width',
description: '1234 → 1234',
options: basicTextPreprocessorOptions,
/** @type {import('language').TextPreprocessorFunction<boolean>} */
process: (str, setting) => (setting ? convertNumericToFullWidth(str) : str)
},
convertAlphabeticCharacters: {
name: 'Convert alphabetic characters to hiragana',
description: 'yomichan → よみちゃん',
options: basicTextPreprocessorOptions,
/** @type {import('language').TextPreprocessorFunction<boolean>} */
process: (str, setting, sourceMap) => (setting ? convertAlphabeticToKana(str, sourceMap) : str)
},
convertHiraganaToKatakana: {
name: 'Convert hiragana to katakana',
description: 'よみちゃん → ヨミチャン',
options: basicTextPreprocessorOptions,
/** @type {import('language').TextPreprocessorFunction<boolean>} */
process: (str, setting) => (setting ? convertHiraganaToKatakana(str) : str)
},
convertKatakanaToHiragana: {
name: 'Convert katakana to hiragana',
description: 'ヨミチャン → よみちゃん',
options: basicTextPreprocessorOptions,
/** @type {import('language').TextPreprocessorFunction<boolean>} */
process: (str, setting) => (setting ? convertKatakanaToHiragana(str) : str)
},
collapseEmphaticSequences: {
name: 'Collapse emphatic character sequences',
description: 'すっっごーーい → すっごーい / すごい',
options: [[false, false], [true, false], [true, true]],
/** @type {import('language').TextPreprocessorFunction<[collapseEmphatic: boolean, collapseEmphaticFull: boolean]>} */
process: (str, setting, sourceMap) => {
const [collapseEmphatic, collapseEmphaticFull] = setting;
if (collapseEmphatic) {
str = collapseEmphaticSequences(str, collapseEmphaticFull, sourceMap);
}
return str;
}
}
}
};
Loading

0 comments on commit e24b2ae

Please sign in to comment.