From e4a49f8ad64c55be9a3d025fcebba3acfe5d0fa6 Mon Sep 17 00:00:00 2001 From: TheOtherOne Date: Sat, 21 Sep 2024 23:34:07 -0400 Subject: [PATCH 1/6] [yi] Initial commit --- ext/js/language/language-descriptors.js | 17 +++ .../yi/yiddish-text-postprocessors.js | 39 +++++++ .../language/yi/yiddish-text-preprocessors.js | 62 ++++++++++ ext/js/language/yi/yiddish-transforms.js | 108 ++++++++++++++++++ test/language/yiddish-transforms.test.js | 44 +++++++ types/ext/language-descriptors.d.ts | 9 ++ 6 files changed, 279 insertions(+) create mode 100644 ext/js/language/yi/yiddish-text-postprocessors.js create mode 100644 ext/js/language/yi/yiddish-text-preprocessors.js create mode 100644 ext/js/language/yi/yiddish-transforms.js create mode 100644 test/language/yiddish-transforms.test.js diff --git a/ext/js/language/language-descriptors.js b/ext/js/language/language-descriptors.js index bdaebd61be..3d1d00b24e 100644 --- a/ext/js/language/language-descriptors.js +++ b/ext/js/language/language-descriptors.js @@ -40,6 +40,9 @@ import {albanianTransforms} from './sq/albanian-transforms.js'; import {capitalizeFirstLetter, decapitalize, removeAlphabeticDiacritics} from './text-processors.js'; import {tagalogTransforms} from './tl/tagalog-transforms.js'; import {normalizeDiacritics} from './vi/viet-text-preprocessors.js'; +import {convertFinalLetters} from './yi/yiddish-text-postprocessors.js'; +import {convertYiddishLigatures, removeYiddishDiacritics} from './yi/yiddish-text-preprocessors.js'; +import {yiddishTransforms} from './yi/yiddish-transforms.js'; import {isStringPartiallyChinese, normalizePinyin} from './zh/chinese.js'; const capitalizationPreprocessors = { @@ -369,6 +372,20 @@ const languageDescriptors = [ normalizeDiacritics, }, }, + { + iso: 'yi', + iso639_3: 'yid', + name: 'Yiddish', + exampleText: 'באַשאַפֿן', + textPreprocessors: { + convertYiddishLigatures, + removeYiddishDiacritics, + }, + textPostprocessors: { + convertFinalLetters, + }, + languageTransforms: yiddishTransforms, + }, { iso: 'yue', iso639_3: 'yue', diff --git a/ext/js/language/yi/yiddish-text-postprocessors.js b/ext/js/language/yi/yiddish-text-postprocessors.js new file mode 100644 index 0000000000..cb23e1c842 --- /dev/null +++ b/ext/js/language/yi/yiddish-text-postprocessors.js @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2024 Yomitan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + + +const final_letter_map = new Map([ + ['מ', 'ם'], + ['נ', 'ן'], + ['צ', 'ץ'], + ['פ', 'ף'], + ['כ', 'ך'], +]); + +/* This could probably be optimized with a regular expression and a function call in str.replace instead of a for loop */ +/** @type {import('language').TextProcessor} */ +export const convertFinalLetters = { + name: 'Convert to Final Letters', + description: 'קויף → קויפֿ', + options: [true], + process: (str) => { + if ([...final_letter_map.keys()].includes(str.charAt(str.length - 1))) { + str = str.substring(0, str.length - 1) + final_letter_map.get(str.substring(str.length - 1)); + } + return str; + }, +}; diff --git a/ext/js/language/yi/yiddish-text-preprocessors.js b/ext/js/language/yi/yiddish-text-preprocessors.js new file mode 100644 index 0000000000..9f2e9a1e6e --- /dev/null +++ b/ext/js/language/yi/yiddish-text-preprocessors.js @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2024 Yomitan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +import {basicTextProcessorOptions} from '../text-processors.js'; + +/* Unicode NFKC does not break apart Yiddish ligatures */ + +const ligatures = [ + {lig: '\u05f0', split: '\u05d5' + '\u05d5'}, // װ -> וו + {lig: '\u05f1', split: '\u05d5' + '\u05d9'}, // ױ -> וי + {lig: '\u05f2', split: '\u05d9' + '\u05d9'}, // ײ -> יי + {lig: '\ufb1d', split: '\u05d9' + '\u05b4'}, // יִ -> יִ + {lig: '\ufb1f', split: '\u05d9' + '\u05d9' + '\u05b7'}, // ײַ -> ייַ +]; + +/* This could probably be optimized with a regular expression and a function call in str.replace instead of a for loop */ +/** @type {import('language').BidirectionalConversionPreprocessor} */ +export const convertYiddishLigatures = { + name: 'Split Ligatures', + description: 'וו → װ', + options: ['off', 'direct', 'inverse'], + process: (str, setting) => { + switch (setting) { + case 'off': + return str; + case 'direct': + for (const ligature of ligatures) { + str = str.replace(ligature.lig, ligature.split); + } + return str; + case 'inverse': + for (const ligature of ligatures) { + str = str.replace(ligature.split, ligature.lig); + } + return str; + } + }, +}; + +/** @type {import('language').TextProcessor} */ +export const removeYiddishDiacritics = { + name: 'Remove Diacritics', + description: 'פאת → פֿאָתּ', + options: basicTextProcessorOptions, + process: (str, setting) => { + return setting ? str.replace(/[\u05B0-\u05C7]/g, '') : str; + }, +}; diff --git a/ext/js/language/yi/yiddish-transforms.js b/ext/js/language/yi/yiddish-transforms.js new file mode 100644 index 0000000000..a014a083e6 --- /dev/null +++ b/ext/js/language/yi/yiddish-transforms.js @@ -0,0 +1,108 @@ +/* + * Copyright (C) 2024 Yomitan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +import {suffixInflection} from '../language-transforms.js'; + +/** @typedef {keyof typeof conditions} Condition */ + +/* +TODO + Nouns + inflectionalSuffixes + -s (plural) + -es (plural) + -n (plural) + -en (plural) + -t (plural) + -ekh (plural) + -er (comparative) + Verbs + Past + -t ge- +n + -n ge- +n + converbs + Present + inf (dictionary form) + 1st p. s. +n + 2nd p. s. -st +n + 2nd question form -stu +n + 3rd p. s. -t +n + 1st p. pl. (dict form) + 2nd p. pl -t +n + 3rd p. pl (dict form) + Future + (dict form) + Converbs + Adjectives + -er + -e +*/ + +const conditions = { + v: { + name: 'Verb', + isDictionaryForm: true, + }, + n: { + name: 'Noun', + isDictionaryForm: true, + subConditions: ['np', 'ns'], + }, + np: { + name: 'Noun plural', + isDictionaryForm: false, + }, + ns: { + name: 'Noun singular', + isDictionaryForm: true, + }, + adj: { + name: 'Adjective', + isDictionaryForm: true, + }, + adv: { + name: 'Adverb', + isDictionaryForm: true, + }, +}; + +/** @type {import('language-transformer').LanguageTransformDescriptor} */ +export const yiddishTransforms = { + language: 'yi', + conditions, + transforms: { + plural: { + name: 'plural', + description: 'plural form of a noun', + rules: [ + suffixInflection('ס', '', ['np'], ['ns']), + suffixInflection('ן', '', ['np'], ['ns']), + suffixInflection('ער', '', ['np'], ['ns']), + suffixInflection('ים', '', ['np'], ['ns']), + ], + }, + diminutive: { + name: 'diminutive', + description: 'diminutive form of a noun', + rules: [ + suffixInflection('לעך', '', ['n'], ['n']), + suffixInflection('טשיק', '', ['n'], ['n']), + suffixInflection('קע', '', ['n'], ['n']), + ], + }, + }, +}; diff --git a/test/language/yiddish-transforms.test.js b/test/language/yiddish-transforms.test.js new file mode 100644 index 0000000000..e57e75912a --- /dev/null +++ b/test/language/yiddish-transforms.test.js @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2023-2024 Yomitan Authors + * Copyright (C) 2020-2022 Yomichan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +import {LanguageTransformer} from '../../ext/js/language/language-transformer.js'; +import {yiddishTransforms} from '../../ext/js/language/yi/yiddish-transforms.js'; +import {testLanguageTransformer} from '../fixtures/language-transformer-test.js'; + + +const tests = [ + { + category: 'nouns', + valid: true, + tests: [ + {term: 'גרופּע', source: 'גרופּעס', rule: 'ns', reasons: ['plural']}, + {term: 'טיש', source: 'טישן', rule: 'ns', reasons: ['plural']}, + {term: 'פּויער', source: 'פּויערים', rule: 'ns', reasons: ['plural']}, + {term: 'קינד', source: 'קינדער', rule: 'ns', reasons: ['plural']}, + {term: 'קינדער', source: 'קינדערלעך', rule: 'n', reasons: ['diminutive']}, + {term: 'עטיקעט', source: 'עטיקעטקע', rule: 'n', reasons: ['diminutive']}, + {term: 'עטיקעט', source: 'עטיקעטקע', rule: 'n', reasons: ['diminutive']}, + {term: 'קליענטעל', source: 'קליענטעלטשיק', rule: 'n', reasons: ['diminutive']}, + ], + }, +]; + + +const languageTransformer = new LanguageTransformer(); +languageTransformer.addDescriptor(yiddishTransforms); +testLanguageTransformer(languageTransformer, tests); diff --git a/types/ext/language-descriptors.d.ts b/types/ext/language-descriptors.d.ts index 62643e0c75..d7a522f471 100644 --- a/types/ext/language-descriptors.d.ts +++ b/types/ext/language-descriptors.d.ts @@ -200,6 +200,15 @@ type AllTextProcessors = { normalizeDiacritics: TextProcessor<'old' | 'new' | 'off'>; }; }; + yi: { + pre: { + removeYiddishDiacritics: TextProcessor; + convertYiddishLigatures: BidirectionalConversionPreprocessor; + }; + post: { + convertFinalLetters: TextProcessor; + }; + }; yue: Record; zh: Record; }; From fe1068e9e4145a018c028af8607486fb4ec2e751 Mon Sep 17 00:00:00 2001 From: TheOtherOne Date: Tue, 24 Sep 2024 13:27:30 -0400 Subject: [PATCH 2/6] [yi] Add umlaut demutation for diminutives and plurals --- ext/js/language/language-descriptors.js | 7 +-- .../yi/yiddish-text-postprocessors.js | 49 +++++++++++++++--- .../language/yi/yiddish-text-preprocessors.js | 39 +++++---------- ext/js/language/yi/yiddish-transforms.js | 50 ++++++++++++++++++- test/language/yiddish-transforms.test.js | 4 ++ types/ext/language-descriptors.d.ts | 3 +- 6 files changed, 113 insertions(+), 39 deletions(-) diff --git a/ext/js/language/language-descriptors.js b/ext/js/language/language-descriptors.js index 3d1d00b24e..e876da99d6 100644 --- a/ext/js/language/language-descriptors.js +++ b/ext/js/language/language-descriptors.js @@ -40,8 +40,8 @@ import {albanianTransforms} from './sq/albanian-transforms.js'; import {capitalizeFirstLetter, decapitalize, removeAlphabeticDiacritics} from './text-processors.js'; import {tagalogTransforms} from './tl/tagalog-transforms.js'; import {normalizeDiacritics} from './vi/viet-text-preprocessors.js'; -import {convertFinalLetters} from './yi/yiddish-text-postprocessors.js'; -import {convertYiddishLigatures, removeYiddishDiacritics} from './yi/yiddish-text-preprocessors.js'; +import {convertFinalLetters, convertYiddishLigatures} from './yi/yiddish-text-postprocessors.js'; +import {combineYiddishLigatures, removeYiddishDiacritics} from './yi/yiddish-text-preprocessors.js'; import {yiddishTransforms} from './yi/yiddish-transforms.js'; import {isStringPartiallyChinese, normalizePinyin} from './zh/chinese.js'; @@ -378,11 +378,12 @@ const languageDescriptors = [ name: 'Yiddish', exampleText: 'באַשאַפֿן', textPreprocessors: { - convertYiddishLigatures, removeYiddishDiacritics, + combineYiddishLigatures, }, textPostprocessors: { convertFinalLetters, + convertYiddishLigatures, }, languageTransforms: yiddishTransforms, }, diff --git a/ext/js/language/yi/yiddish-text-postprocessors.js b/ext/js/language/yi/yiddish-text-postprocessors.js index cb23e1c842..d082be7ebb 100644 --- a/ext/js/language/yi/yiddish-text-postprocessors.js +++ b/ext/js/language/yi/yiddish-text-postprocessors.js @@ -17,23 +17,56 @@ const final_letter_map = new Map([ - ['מ', 'ם'], - ['נ', 'ן'], - ['צ', 'ץ'], - ['פ', 'ף'], - ['כ', 'ך'], + ['\u05de', '\u05dd'], // מ to ם + ['\u05e0', '\u05df'], // נ to ן + ['\u05e6', '\u05e5'], // צ to ץ + ['\u05e4', '\u05e3'], // פ to ף + ['\u05dB', '\u05da'], // כ to ך ]); -/* This could probably be optimized with a regular expression and a function call in str.replace instead of a for loop */ +const ligatures = [ + {lig: '\u05f0', split: '\u05d5' + '\u05d5'}, // װ -> וו + {lig: '\u05f1', split: '\u05d5' + '\u05d9'}, // ױ -> וי + {lig: '\u05f2', split: '\u05d9' + '\u05d9'}, // ײ -> יי + {lig: '\ufb1d', split: '\u05d9' + '\u05b4'}, // יִ -> יִ + {lig: '\ufb1f', split: '\u05d9' + '\u05d9' + '\u05b7'}, // ײַ -> ייַ + {lig: '\ufb2e', split: '\u05d0' + '\u05b7'}, // Pasekh alef + {lig: '\ufb2f', split: '\u05d0' + '\u05b8'}, // Komets alef +]; + /** @type {import('language').TextProcessor} */ export const convertFinalLetters = { name: 'Convert to Final Letters', description: 'קויף → קויפֿ', options: [true], process: (str) => { - if ([...final_letter_map.keys()].includes(str.charAt(str.length - 1))) { - str = str.substring(0, str.length - 1) + final_letter_map.get(str.substring(str.length - 1)); + const len = str.length - 1; + if ([...final_letter_map.keys()].includes(str.charAt(len))) { + str = str.substring(0, len) + final_letter_map.get(str.substring(len)); } return str; }, }; + +/** @type {import('language').BidirectionalConversionPreprocessor} */ +export const convertYiddishLigatures = { + name: 'Split Ligatures', + description: 'וו → װ', + options: ['off', 'direct', 'inverse'], + process: (str, setting) => { + switch (setting) { + case 'off': + return str; + case 'direct': + for (const ligature of ligatures) { + str = str.replace(ligature.lig, ligature.split); + } + return str; + case 'inverse': + for (const ligature of ligatures) { + str = str.replace(ligature.split, ligature.lig); + } + return str; + } + }, +}; diff --git a/ext/js/language/yi/yiddish-text-preprocessors.js b/ext/js/language/yi/yiddish-text-preprocessors.js index 9f2e9a1e6e..8c9684d630 100644 --- a/ext/js/language/yi/yiddish-text-preprocessors.js +++ b/ext/js/language/yi/yiddish-text-preprocessors.js @@ -15,39 +15,26 @@ * along with this program. If not, see . */ -import {basicTextProcessorOptions} from '../text-processors.js'; - -/* Unicode NFKC does not break apart Yiddish ligatures */ - const ligatures = [ {lig: '\u05f0', split: '\u05d5' + '\u05d5'}, // װ -> וו {lig: '\u05f1', split: '\u05d5' + '\u05d9'}, // ױ -> וי {lig: '\u05f2', split: '\u05d9' + '\u05d9'}, // ײ -> יי {lig: '\ufb1d', split: '\u05d9' + '\u05b4'}, // יִ -> יִ {lig: '\ufb1f', split: '\u05d9' + '\u05d9' + '\u05b7'}, // ײַ -> ייַ + {lig: '\ufb2e', split: '\u05d0' + '\u05b7'}, // Pasekh alef + {lig: '\ufb2f', split: '\u05d0' + '\u05b8'}, // Komets alef ]; -/* This could probably be optimized with a regular expression and a function call in str.replace instead of a for loop */ -/** @type {import('language').BidirectionalConversionPreprocessor} */ -export const convertYiddishLigatures = { - name: 'Split Ligatures', +/** @type {import('language').TextProcessor} */ +export const combineYiddishLigatures = { + name: 'Combine Ligatures', description: 'וו → װ', - options: ['off', 'direct', 'inverse'], - process: (str, setting) => { - switch (setting) { - case 'off': - return str; - case 'direct': - for (const ligature of ligatures) { - str = str.replace(ligature.lig, ligature.split); - } - return str; - case 'inverse': - for (const ligature of ligatures) { - str = str.replace(ligature.split, ligature.lig); - } - return str; + options: [true], + process: (str) => { + for (const ligature of ligatures) { + str = str.replace(ligature.split, ligature.lig); } + return str; }, }; @@ -55,8 +42,8 @@ export const convertYiddishLigatures = { export const removeYiddishDiacritics = { name: 'Remove Diacritics', description: 'פאת → פֿאָתּ', - options: basicTextProcessorOptions, - process: (str, setting) => { - return setting ? str.replace(/[\u05B0-\u05C7]/g, '') : str; + options: [true], + process: (str) => { + return str.replace(/[\u05B0-\u05C7]/g, ''); }, }; diff --git a/ext/js/language/yi/yiddish-transforms.js b/ext/js/language/yi/yiddish-transforms.js index a014a083e6..514f70909d 100644 --- a/ext/js/language/yi/yiddish-transforms.js +++ b/ext/js/language/yi/yiddish-transforms.js @@ -52,6 +52,44 @@ TODO -e */ +const umlautTable = new Map([ + ['\u05e2', '\u05d0'], // Ayin to Shtumer alef + ['\u05f2', '\u05f1'], // Tsvey yudn to Vov yud + ['\u05d9', '\u05d5'], // Yud to Vov +]); + +/** + * @param {string} str + * @returns {string} + */ +function umlautMutation(str) { + const match = (/[עװאאַױוײיִײַיאָ](?!.*[װאאַױוײיִײַיאָע])/).exec(str); + if (match !== null && [...umlautTable.keys()].includes(str.charAt(match.index))) { + str = str.substring(0, match.index) + umlautTable.get(str.charAt(match.index)) + str.substring(match.index + 1); + } + return str; +} + +/** + * @template {string} TCondition + * @param {string} inflectedSuffix + * @param {string} deinflectedSuffix + * @param {TCondition[]} conditionsIn + * @param {TCondition[]} conditionsOut + * @returns {import('language-transformer').SuffixRule} + */ +function umlautMutationSuffixInflection(inflectedSuffix, deinflectedSuffix, conditionsIn, conditionsOut) { + const suffixRegExp = new RegExp(inflectedSuffix + '$'); + return { + type: 'suffix', + isInflected: suffixRegExp, + deinflected: deinflectedSuffix, + deinflect: (text) => umlautMutation(text.slice(0, -inflectedSuffix.length)) + deinflectedSuffix, + conditionsIn, + conditionsOut, + }; +} + const conditions = { v: { name: 'Verb', @@ -91,8 +129,16 @@ export const yiddishTransforms = { rules: [ suffixInflection('ס', '', ['np'], ['ns']), suffixInflection('ן', '', ['np'], ['ns']), - suffixInflection('ער', '', ['np'], ['ns']), suffixInflection('ים', '', ['np'], ['ns']), + suffixInflection('ער', '', ['np'], ['ns']), + ], + }, + umlaut_plural: { + name: 'umlaut_plural', + description: 'plural form of a umlaut noun', + rules: [ + umlautMutationSuffixInflection('ער', '', ['np'], ['ns']), + umlautMutationSuffixInflection('לעך', '', ['np'], ['ns']), ], }, diminutive: { @@ -102,6 +148,8 @@ export const yiddishTransforms = { suffixInflection('לעך', '', ['n'], ['n']), suffixInflection('טשיק', '', ['n'], ['n']), suffixInflection('קע', '', ['n'], ['n']), + umlautMutationSuffixInflection('ל', '', ['n'], ['n']), + umlautMutationSuffixInflection('עלע', '', ['n'], ['n']), ], }, }, diff --git a/test/language/yiddish-transforms.test.js b/test/language/yiddish-transforms.test.js index e57e75912a..d633c83019 100644 --- a/test/language/yiddish-transforms.test.js +++ b/test/language/yiddish-transforms.test.js @@ -34,6 +34,10 @@ const tests = [ {term: 'עטיקעט', source: 'עטיקעטקע', rule: 'n', reasons: ['diminutive']}, {term: 'עטיקעט', source: 'עטיקעטקע', rule: 'n', reasons: ['diminutive']}, {term: 'קליענטעל', source: 'קליענטעלטשיק', rule: 'n', reasons: ['diminutive']}, + {term: 'קאצ', source: 'קעצעלע', rule: 'n', reasons: ['diminutive']}, + {term: 'קאצ', source: 'קעצל', rule: 'n', reasons: ['diminutive']}, + {term: 'מױד', source: 'מײדלעך', rule: 'ns', reasons: ['umlaut_plural']}, + {term: 'מאנ', source: 'מענער', rule: 'ns', reasons: ['umlaut_plural']}, ], }, ]; diff --git a/types/ext/language-descriptors.d.ts b/types/ext/language-descriptors.d.ts index d7a522f471..4768e5f777 100644 --- a/types/ext/language-descriptors.d.ts +++ b/types/ext/language-descriptors.d.ts @@ -202,11 +202,12 @@ type AllTextProcessors = { }; yi: { pre: { + combineYiddishLigatures: TextProcessor; removeYiddishDiacritics: TextProcessor; - convertYiddishLigatures: BidirectionalConversionPreprocessor; }; post: { convertFinalLetters: TextProcessor; + convertYiddishLigatures: BidirectionalConversionPreprocessor; }; }; yue: Record; From c9347a1fe2c859edcc49445b1faaa55f597ddd78 Mon Sep 17 00:00:00 2001 From: TheOtherOne Date: Mon, 4 Nov 2024 22:59:51 -0500 Subject: [PATCH 3/6] [yi] Add verb mutation code --- ext/js/language/yi/yiddish-transforms.js | 89 +++++++++++------------- test/language/yiddish-transforms.test.js | 8 ++- 2 files changed, 48 insertions(+), 49 deletions(-) diff --git a/ext/js/language/yi/yiddish-transforms.js b/ext/js/language/yi/yiddish-transforms.js index 514f70909d..cd59483f37 100644 --- a/ext/js/language/yi/yiddish-transforms.js +++ b/ext/js/language/yi/yiddish-transforms.js @@ -19,39 +19,6 @@ import {suffixInflection} from '../language-transforms.js'; /** @typedef {keyof typeof conditions} Condition */ -/* -TODO - Nouns - inflectionalSuffixes - -s (plural) - -es (plural) - -n (plural) - -en (plural) - -t (plural) - -ekh (plural) - -er (comparative) - Verbs - Past - -t ge- +n - -n ge- +n - converbs - Present - inf (dictionary form) - 1st p. s. +n - 2nd p. s. -st +n - 2nd question form -stu +n - 3rd p. s. -t +n - 1st p. pl. (dict form) - 2nd p. pl -t +n - 3rd p. pl (dict form) - Future - (dict form) - Converbs - Adjectives - -er - -e -*/ - const umlautTable = new Map([ ['\u05e2', '\u05d0'], // Ayin to Shtumer alef ['\u05f2', '\u05f1'], // Tsvey yudn to Vov yud @@ -63,7 +30,7 @@ const umlautTable = new Map([ * @returns {string} */ function umlautMutation(str) { - const match = (/[עװאאַױוײיִײַיאָ](?!.*[װאאַױוײיִײַיאָע])/).exec(str); + const match = (/[\u05E2\u05F0\u05D0\uFB2E\u05F1\u05D5\u05F2\uFB1D\uFB1F\u05D9\uFB2F](?!.*[\u05E2\u05F0\u05D0\uFB2E\u05F1\u05D5\u05F2\uFB1D\uFB1F\u05D9\uFB2F])/).exec(str); if (match !== null && [...umlautTable.keys()].includes(str.charAt(match.index))) { str = str.substring(0, match.index) + umlautTable.get(str.charAt(match.index)) + str.substring(match.index + 1); } @@ -94,6 +61,15 @@ const conditions = { v: { name: 'Verb', isDictionaryForm: true, + subConditions: ['vpast', 'vpresent'], + }, + vpast: { + name: 'Verb, past tense', + isDictionaryForm: false, + }, + vpresent: { + name: 'Verb, present tense', + isDictionaryForm: true, }, n: { name: 'Noun', @@ -101,11 +77,11 @@ const conditions = { subConditions: ['np', 'ns'], }, np: { - name: 'Noun plural', - isDictionaryForm: false, + name: 'Noun, plural', + isDictionaryForm: true, }, ns: { - name: 'Noun singular', + name: 'Noun, singular', isDictionaryForm: true, }, adj: { @@ -127,29 +103,46 @@ export const yiddishTransforms = { name: 'plural', description: 'plural form of a noun', rules: [ - suffixInflection('ס', '', ['np'], ['ns']), - suffixInflection('ן', '', ['np'], ['ns']), - suffixInflection('ים', '', ['np'], ['ns']), - suffixInflection('ער', '', ['np'], ['ns']), + suffixInflection('\u05E1', '', ['np'], ['ns']), // -s + suffixInflection('\u05DF', '', ['np'], ['ns']), // -n + suffixInflection('\u05D9\u05DD', '', ['np'], ['ns']), // -im + suffixInflection('\u05E2\u05E8', '', ['np'], ['ns']), // -er ], }, umlaut_plural: { name: 'umlaut_plural', description: 'plural form of a umlaut noun', rules: [ - umlautMutationSuffixInflection('ער', '', ['np'], ['ns']), - umlautMutationSuffixInflection('לעך', '', ['np'], ['ns']), + umlautMutationSuffixInflection('\u05E2\u05E8', '', ['np'], ['ns']), // -er + umlautMutationSuffixInflection('\u05DC\u05E2\u05DA', '', ['np'], ['ns']), // -lekh ], }, diminutive: { name: 'diminutive', description: 'diminutive form of a noun', rules: [ - suffixInflection('לעך', '', ['n'], ['n']), - suffixInflection('טשיק', '', ['n'], ['n']), - suffixInflection('קע', '', ['n'], ['n']), - umlautMutationSuffixInflection('ל', '', ['n'], ['n']), - umlautMutationSuffixInflection('עלע', '', ['n'], ['n']), + suffixInflection('\u05DC\u05E2\u05DA', '', ['n'], ['n']), // -lekh + suffixInflection('\u05D8\u05E9\u05D9\u05E7', '', ['n'], ['n']), // -tshik + suffixInflection('\u05E7\u05E2', '', ['n'], ['n']), // -ke + umlautMutationSuffixInflection('\u05DC', '', ['n'], ['n']), // -l + umlautMutationSuffixInflection('\u05E2\u05DC\u05E2', '', ['n'], ['n']), // -ele + ], + }, + verb_present_singular_to_first_person: { + name: 'verb_present_singular_to_first_person', + description: 'Turn the second and third person singular form to first person', + rules: [ + suffixInflection('\u05E1\u05D8', '', ['v'], ['vpresent']), // -st + suffixInflection('\u05D8', '', ['v'], ['vpresent']), // -t + suffixInflection('\u05E0\u05D3\u05D9\u05E7', '', ['v'], ['vpresent']), // -ndik + ], + }, + verb_present_plural_to_first_person: { + name: 'verb_present_plural_to_first_person', + description: 'Turn the second plural form to first person plural form', + rules: [ + suffixInflection('\u05D8\u05E1', '\u05E0', ['v'], ['vpresent']), // -ts + suffixInflection('\u05D8', '\u05E0', ['v'], ['vpresent']), // -t ], }, }, diff --git a/test/language/yiddish-transforms.test.js b/test/language/yiddish-transforms.test.js index d633c83019..b8a5f93ad5 100644 --- a/test/language/yiddish-transforms.test.js +++ b/test/language/yiddish-transforms.test.js @@ -20,7 +20,8 @@ import {LanguageTransformer} from '../../ext/js/language/language-transformer.js import {yiddishTransforms} from '../../ext/js/language/yi/yiddish-transforms.js'; import {testLanguageTransformer} from '../fixtures/language-transformer-test.js'; - +/* Since Yiddish final letters are handled in a text postprocessor after all the transformations have been run, test cases must never use the final form of a letter! +Otherwise, it will fail even if the rule is correct! */ const tests = [ { category: 'nouns', @@ -38,6 +39,11 @@ const tests = [ {term: 'קאצ', source: 'קעצל', rule: 'n', reasons: ['diminutive']}, {term: 'מױד', source: 'מײדלעך', rule: 'ns', reasons: ['umlaut_plural']}, {term: 'מאנ', source: 'מענער', rule: 'ns', reasons: ['umlaut_plural']}, + {term: 'קויפֿ', source: 'קויפֿסט', rule: 'v', reasons: ['verb_present_singular_to_first_person']}, + {term: 'קויפֿ', source: 'קויפֿט', rule: 'vpresent', reasons: ['verb_present_singular_to_first_person']}, + {term: 'קויפֿנ', source: 'קויפֿט', rule: 'vpresent', reasons: ['verb_present_plural_to_first_person']}, + {term: 'קויפֿנ', source: 'קויפֿטס', rule: 'vpresent', reasons: ['verb_present_plural_to_first_person']}, + {term: 'קויפֿנ', source: 'קויפֿטס', rule: 'vpresent', reasons: ['verb_present_plural_to_first_person']}, ], }, ]; From 58e3debb21fbc23ded7721212d7cee025f9e8401 Mon Sep 17 00:00:00 2001 From: ThatsItForTheOtherOne Date: Sat, 30 Nov 2024 01:42:09 +0000 Subject: [PATCH 4/6] [yi] Add missing plural forms, separate dimimutive from diminutive with umlaut --- ext/js/language/yi/yiddish-transforms.js | 22 +++++++++-- test/language/yiddish-transforms.test.js | 47 +++++++++++++++++------- 2 files changed, 51 insertions(+), 18 deletions(-) diff --git a/ext/js/language/yi/yiddish-transforms.js b/ext/js/language/yi/yiddish-transforms.js index cd59483f37..3f414da42b 100644 --- a/ext/js/language/yi/yiddish-transforms.js +++ b/ext/js/language/yi/yiddish-transforms.js @@ -78,7 +78,7 @@ const conditions = { }, np: { name: 'Noun, plural', - isDictionaryForm: true, + isDictionaryForm: false, }, ns: { name: 'Noun, singular', @@ -105,8 +105,12 @@ export const yiddishTransforms = { rules: [ suffixInflection('\u05E1', '', ['np'], ['ns']), // -s suffixInflection('\u05DF', '', ['np'], ['ns']), // -n - suffixInflection('\u05D9\u05DD', '', ['np'], ['ns']), // -im + suffixInflection('\u05D9\u05DD', '', ['np'], ['ns']), // -im, hebrew suffixInflection('\u05E2\u05E8', '', ['np'], ['ns']), // -er + suffixInflection('\u05E2\u05DA', '', ['np'], ['ns']), // -ekh + suffixInflection('\u05E2\u05DF', '', ['np'], ['ns']), // -en + suffixInflection('\u05E2\u05E1', '', ['np'], ['ns']), // -es + suffixInflection('\u05D5\u05EA', '', ['np'], ['ns']), // -ot, hebrew ], }, umlaut_plural: { @@ -114,16 +118,26 @@ export const yiddishTransforms = { description: 'plural form of a umlaut noun', rules: [ umlautMutationSuffixInflection('\u05E2\u05E8', '', ['np'], ['ns']), // -er - umlautMutationSuffixInflection('\u05DC\u05E2\u05DA', '', ['np'], ['ns']), // -lekh + umlautMutationSuffixInflection('\u05E2\u05E1', '', ['np'], ['ns']), // -es + umlautMutationSuffixInflection('\u05D9\u05DD', '', ['np'], ['ns']), // -im + umlautMutationSuffixInflection('\u05E2\u05DF', '', ['np'], ['ns']), // -en + umlautMutationSuffixInflection('\u05DF', '', ['np'], ['ns']), // -n ], }, diminutive: { name: 'diminutive', description: 'diminutive form of a noun', rules: [ - suffixInflection('\u05DC\u05E2\u05DA', '', ['n'], ['n']), // -lekh suffixInflection('\u05D8\u05E9\u05D9\u05E7', '', ['n'], ['n']), // -tshik suffixInflection('\u05E7\u05E2', '', ['n'], ['n']), // -ke + suffixInflection('\u05DC', '', ['n'], ['n']), // -l + suffixInflection('\u05E2\u05DC\u05E2', '', ['n'], ['n']), // -ele + ], + }, + diminutive_and_umlaut: { + name: 'diminutive_and_umlaut', + description: 'diminutive form of a noun with stem umlaut', + rules: [ umlautMutationSuffixInflection('\u05DC', '', ['n'], ['n']), // -l umlautMutationSuffixInflection('\u05E2\u05DC\u05E2', '', ['n'], ['n']), // -ele ], diff --git a/test/language/yiddish-transforms.test.js b/test/language/yiddish-transforms.test.js index b8a5f93ad5..13c170ac46 100644 --- a/test/language/yiddish-transforms.test.js +++ b/test/language/yiddish-transforms.test.js @@ -20,25 +20,44 @@ import {LanguageTransformer} from '../../ext/js/language/language-transformer.js import {yiddishTransforms} from '../../ext/js/language/yi/yiddish-transforms.js'; import {testLanguageTransformer} from '../fixtures/language-transformer-test.js'; -/* Since Yiddish final letters are handled in a text postprocessor after all the transformations have been run, test cases must never use the final form of a letter! -Otherwise, it will fail even if the rule is correct! */ +/* Since Yiddish final letters are handled in a text postprocessor after all the transformations have been run, test case terms must never use the final form of a letter! +Otherwise, it will fail even if the rule is correct! Sources require use of final letters however for plural deinflection */ const tests = [ { category: 'nouns', valid: true, tests: [ - {term: 'גרופּע', source: 'גרופּעס', rule: 'ns', reasons: ['plural']}, - {term: 'טיש', source: 'טישן', rule: 'ns', reasons: ['plural']}, - {term: 'פּויער', source: 'פּויערים', rule: 'ns', reasons: ['plural']}, - {term: 'קינד', source: 'קינדער', rule: 'ns', reasons: ['plural']}, - {term: 'קינדער', source: 'קינדערלעך', rule: 'n', reasons: ['diminutive']}, - {term: 'עטיקעט', source: 'עטיקעטקע', rule: 'n', reasons: ['diminutive']}, - {term: 'עטיקעט', source: 'עטיקעטקע', rule: 'n', reasons: ['diminutive']}, - {term: 'קליענטעל', source: 'קליענטעלטשיק', rule: 'n', reasons: ['diminutive']}, - {term: 'קאצ', source: 'קעצעלע', rule: 'n', reasons: ['diminutive']}, - {term: 'קאצ', source: 'קעצל', rule: 'n', reasons: ['diminutive']}, - {term: 'מױד', source: 'מײדלעך', rule: 'ns', reasons: ['umlaut_plural']}, - {term: 'מאנ', source: 'מענער', rule: 'ns', reasons: ['umlaut_plural']}, + {term: 'גרופּע', source: 'גרופּעס', rule: 'ns', reasons: ['plural']}, // grupes -> gupe + {term: 'טיש', source: 'טישן', rule: 'ns', reasons: ['plural']}, // tishn -> tish + {term: 'פּויער', source: 'פּויערים', rule: 'ns', reasons: ['plural']}, // poyerim -> poyer + {term: 'קינד', source: 'קינדער', rule: 'ns', reasons: ['plural']}, // kinder -> kind + {term: 'בענקל', source: 'בענקלעך', rule: 'ns', reasons: ['plural']}, // benklekh -> benkl + {term: 'באַנ', source: 'באַנען', rule: 'ns', reasons: ['plural']}, // banen -> ban + {term: 'נודניק', source: 'נודניקעס', rule: 'ns', reasons: ['plural']}, // nudnikes -> nudnik + {term: 'חלומ', source: 'חלומות', rule: 'ns', reasons: ['plural']}, // khlomos -> khlom + {term: 'עטיקעט', source: 'עטיקעטקע', rule: 'n', reasons: ['diminutive']}, // etiketke -> etiket + {term: 'קליענטעל', source: 'קליענטעלטשיק', rule: 'n', reasons: ['diminutive']}, // klienteltshik -> klientel + {term: 'קינדער', source: 'קינדערלעך', rule: 'ns', reasons: ['diminutive', 'plural']}, // kinderlekh -> kinder + ], + }, + { + category: 'umlaut_nouns', + valid: true, + tests: [ + {term: 'מאנ', source: 'מענער', rule: 'ns', reasons: ['umlaut_plural']}, // mener -> man + {term: 'טשוואק', source: 'טשוועקעס', rule: 'ns', reasons: ['umlaut_plural']}, // tshvekes -> tshvak + {term: 'קאצ', source: 'קעצעלע', rule: 'n', reasons: ['diminutive_and_umlaut']}, // ketzele -> katz + {term: 'קאצ', source: 'קעצל', rule: 'n', reasons: ['diminutive_and_umlaut']}, // ketzl -> katz + {term: 'מױד', source: 'מײדלעך', rule: 'ns', reasons: ['diminutive_and_umlaut', 'plural']}, // moyd -> meydlekh + {term: 'יסור', source: 'יסורים', rule: 'ns', reasons:['umlaut_plural']}, // yesurim -> yesur (not actually a word lol) + {term: 'בלומ', source: 'בלימען', rule: 'ns', reasons:['umlaut_plural']}, // blimen -> blum + {term: 'אומשטאנד', source: 'אומשטענדן', rule: 'ns', reasons:['umlaut_plural']}, // umshtendn -> umshtand + ], + }, + { + category: 'verbs', + valid: true, + tests: [ {term: 'קויפֿ', source: 'קויפֿסט', rule: 'v', reasons: ['verb_present_singular_to_first_person']}, {term: 'קויפֿ', source: 'קויפֿט', rule: 'vpresent', reasons: ['verb_present_singular_to_first_person']}, {term: 'קויפֿנ', source: 'קויפֿט', rule: 'vpresent', reasons: ['verb_present_plural_to_first_person']}, From e2fc2bc15f6109b537d7ef715c807f65bbcbee20 Mon Sep 17 00:00:00 2001 From: ThatsItForTheOtherOne Date: Sat, 30 Nov 2024 18:36:38 +0000 Subject: [PATCH 5/6] [yi] Modify umlaut demutation to allow demutation of ayin to pasekh alef and komets alef, and demutation of vov yud to ayin --- ext/js/language/yi/yiddish-transforms.js | 67 +++++++++++------------- test/language/yiddish-transforms.test.js | 8 +-- 2 files changed, 35 insertions(+), 40 deletions(-) diff --git a/ext/js/language/yi/yiddish-transforms.js b/ext/js/language/yi/yiddish-transforms.js index 3f414da42b..8eaa1a35d6 100644 --- a/ext/js/language/yi/yiddish-transforms.js +++ b/ext/js/language/yi/yiddish-transforms.js @@ -19,42 +19,37 @@ import {suffixInflection} from '../language-transforms.js'; /** @typedef {keyof typeof conditions} Condition */ -const umlautTable = new Map([ - ['\u05e2', '\u05d0'], // Ayin to Shtumer alef - ['\u05f2', '\u05f1'], // Tsvey yudn to Vov yud - ['\u05d9', '\u05d5'], // Yud to Vov -]); +const mutations = [ + {new: '\u05e2', orig: '\ufb2e'}, // Ayin to pasekh alef + {new: '\u05e2', orig: '\ufb2f'}, // Ayin to komets alef + {new: '\u05e2', orig: '\u05D0'}, // Ayin to shumter alef + {new: '\u05f1', orig: '\u05e2'}, // Vov yud to ayin + {new: '\u05f2', orig: '\u05f1'}, // Tsvey yudn to Vov yud + {new: '\u05d9', orig: '\u05d5'}, // Yud to Vov +]; /** - * @param {string} str - * @returns {string} - */ -function umlautMutation(str) { - const match = (/[\u05E2\u05F0\u05D0\uFB2E\u05F1\u05D5\u05F2\uFB1D\uFB1F\u05D9\uFB2F](?!.*[\u05E2\u05F0\u05D0\uFB2E\u05F1\u05D5\u05F2\uFB1D\uFB1F\u05D9\uFB2F])/).exec(str); - if (match !== null && [...umlautTable.keys()].includes(str.charAt(match.index))) { - str = str.substring(0, match.index) + umlautTable.get(str.charAt(match.index)) + str.substring(match.index + 1); - } - return str; -} - -/** - * @template {string} TCondition * @param {string} inflectedSuffix * @param {string} deinflectedSuffix - * @param {TCondition[]} conditionsIn - * @param {TCondition[]} conditionsOut - * @returns {import('language-transformer').SuffixRule} + * @param {Condition[]} conditionsIn + * @param {Condition[]} conditionsOut + * @returns {import('language-transformer').SuffixRule[]} */ function umlautMutationSuffixInflection(inflectedSuffix, deinflectedSuffix, conditionsIn, conditionsOut) { const suffixRegExp = new RegExp(inflectedSuffix + '$'); - return { - type: 'suffix', - isInflected: suffixRegExp, - deinflected: deinflectedSuffix, - deinflect: (text) => umlautMutation(text.slice(0, -inflectedSuffix.length)) + deinflectedSuffix, - conditionsIn, - conditionsOut, - }; + return mutations.map((mutation) => ( + { + type: 'suffix', + isInflected: suffixRegExp, + deinflected: deinflectedSuffix, + deinflect: (/** @type {string} */ text) => { + const match = new RegExp(/[\u05E2\u05F0\u05D0\uFB2E\u05F1\u05D5\u05F2\uFB1D\uFB1F\u05D9\uFB2F](?!.*[\u05E2\u05F0\u05D0\uFB2E\u05F1\u05D5\u05F2\uFB1D\uFB1F\u05D9\uFB2F])/).exec(text.slice(0, -inflectedSuffix.length)); + return (match?.[0] !== mutation.new) ? '' : text.slice(0, match.index) + mutation.orig + text.slice(match.index + 1, -inflectedSuffix.length) + deinflectedSuffix; + }, + conditionsIn, + conditionsOut, + } + )); } const conditions = { @@ -117,11 +112,11 @@ export const yiddishTransforms = { name: 'umlaut_plural', description: 'plural form of a umlaut noun', rules: [ - umlautMutationSuffixInflection('\u05E2\u05E8', '', ['np'], ['ns']), // -er - umlautMutationSuffixInflection('\u05E2\u05E1', '', ['np'], ['ns']), // -es - umlautMutationSuffixInflection('\u05D9\u05DD', '', ['np'], ['ns']), // -im - umlautMutationSuffixInflection('\u05E2\u05DF', '', ['np'], ['ns']), // -en - umlautMutationSuffixInflection('\u05DF', '', ['np'], ['ns']), // -n + ...umlautMutationSuffixInflection('\u05E2\u05E8', '', ['np'], ['ns']), // -er + ...umlautMutationSuffixInflection('\u05E2\u05E1', '', ['np'], ['ns']), // -es + ...umlautMutationSuffixInflection('\u05D9\u05DD', '', ['np'], ['ns']), // -im + ...umlautMutationSuffixInflection('\u05E2\u05DF', '', ['np'], ['ns']), // -en + ...umlautMutationSuffixInflection('\u05DF', '', ['np'], ['ns']), // -n ], }, diminutive: { @@ -138,8 +133,8 @@ export const yiddishTransforms = { name: 'diminutive_and_umlaut', description: 'diminutive form of a noun with stem umlaut', rules: [ - umlautMutationSuffixInflection('\u05DC', '', ['n'], ['n']), // -l - umlautMutationSuffixInflection('\u05E2\u05DC\u05E2', '', ['n'], ['n']), // -ele + ...umlautMutationSuffixInflection('\u05DC', '', ['n'], ['n']), // -l + ...umlautMutationSuffixInflection('\u05E2\u05DC\u05E2', '', ['n'], ['n']), // -ele ], }, verb_present_singular_to_first_person: { diff --git a/test/language/yiddish-transforms.test.js b/test/language/yiddish-transforms.test.js index 13c170ac46..5aad6afa36 100644 --- a/test/language/yiddish-transforms.test.js +++ b/test/language/yiddish-transforms.test.js @@ -46,12 +46,12 @@ const tests = [ tests: [ {term: 'מאנ', source: 'מענער', rule: 'ns', reasons: ['umlaut_plural']}, // mener -> man {term: 'טשוואק', source: 'טשוועקעס', rule: 'ns', reasons: ['umlaut_plural']}, // tshvekes -> tshvak + {term: 'מױד', source: 'מײדלעך', rule: 'ns', reasons: ['diminutive_and_umlaut', 'plural']}, // meydlekh -> moyd + {term: 'דאָקטער', source: 'דאָקטױרים', rule: 'ns', reasons: ['umlaut_plural']}, // doktoyrim -> dokter + {term: 'בלומ', source: 'בלימען', rule: 'ns', reasons: ['umlaut_plural']}, // blimen -> blum + {term: 'אומשטאנד', source: 'אומשטענדן', rule: 'ns', reasons: ['umlaut_plural']}, // umshtendn -> umshtand {term: 'קאצ', source: 'קעצעלע', rule: 'n', reasons: ['diminutive_and_umlaut']}, // ketzele -> katz {term: 'קאצ', source: 'קעצל', rule: 'n', reasons: ['diminutive_and_umlaut']}, // ketzl -> katz - {term: 'מױד', source: 'מײדלעך', rule: 'ns', reasons: ['diminutive_and_umlaut', 'plural']}, // moyd -> meydlekh - {term: 'יסור', source: 'יסורים', rule: 'ns', reasons:['umlaut_plural']}, // yesurim -> yesur (not actually a word lol) - {term: 'בלומ', source: 'בלימען', rule: 'ns', reasons:['umlaut_plural']}, // blimen -> blum - {term: 'אומשטאנד', source: 'אומשטענדן', rule: 'ns', reasons:['umlaut_plural']}, // umshtendn -> umshtand ], }, { From 86b12a8dd72c2b9b5317f710ea9fe26ab3881f18 Mon Sep 17 00:00:00 2001 From: ThatsItForTheOtherOne Date: Mon, 30 Dec 2024 01:05:44 +0000 Subject: [PATCH 6/6] [yi] Add missing plural forms from Israeli-Haredi yiddish --- ext/js/language/yi/yiddish-transforms.js | 9 +++++++++ test/language/yiddish-transforms.test.js | 10 ++++++++++ 2 files changed, 19 insertions(+) diff --git a/ext/js/language/yi/yiddish-transforms.js b/ext/js/language/yi/yiddish-transforms.js index 8eaa1a35d6..70f2b71364 100644 --- a/ext/js/language/yi/yiddish-transforms.js +++ b/ext/js/language/yi/yiddish-transforms.js @@ -106,6 +106,12 @@ export const yiddishTransforms = { suffixInflection('\u05E2\u05DF', '', ['np'], ['ns']), // -en suffixInflection('\u05E2\u05E1', '', ['np'], ['ns']), // -es suffixInflection('\u05D5\u05EA', '', ['np'], ['ns']), // -ot, hebrew + suffixInflection('\u05E0\u05E1', '', ['np'], ['ns']), // -ns + suffixInflection('\u05E2\u05E8\u05E2\u05DF', '', ['np'], ['ns']), // -eren + suffixInflection('\u05E2\u05E0\u05E2\u05E1', '', ['np'], ['ns']), // -enes + suffixInflection('\u05E2\u05E0\u05E1', '', ['np'], ['ns']), // -ens + suffixInflection('\u05E2\u05E8\u05E1', '', ['np'], ['ns']), // -ers + suffixInflection('\u05E1\u05E2\u05E8', '', ['np'], ['ns']), // -ser ], }, umlaut_plural: { @@ -117,6 +123,9 @@ export const yiddishTransforms = { ...umlautMutationSuffixInflection('\u05D9\u05DD', '', ['np'], ['ns']), // -im ...umlautMutationSuffixInflection('\u05E2\u05DF', '', ['np'], ['ns']), // -en ...umlautMutationSuffixInflection('\u05DF', '', ['np'], ['ns']), // -n + ...umlautMutationSuffixInflection('\u05E1', '', ['np'], ['ns']), // -s + ...umlautMutationSuffixInflection('\u05E2\u05DA', '', ['np'], ['ns']), // -ekh + ...umlautMutationSuffixInflection('\u05E2\u05E8\u05E1', '', ['np'], ['ns']), // -ers ], }, diminutive: { diff --git a/test/language/yiddish-transforms.test.js b/test/language/yiddish-transforms.test.js index 5aad6afa36..f2372d60d8 100644 --- a/test/language/yiddish-transforms.test.js +++ b/test/language/yiddish-transforms.test.js @@ -38,6 +38,12 @@ const tests = [ {term: 'עטיקעט', source: 'עטיקעטקע', rule: 'n', reasons: ['diminutive']}, // etiketke -> etiket {term: 'קליענטעל', source: 'קליענטעלטשיק', rule: 'n', reasons: ['diminutive']}, // klienteltshik -> klientel {term: 'קינדער', source: 'קינדערלעך', rule: 'ns', reasons: ['diminutive', 'plural']}, // kinderlekh -> kinder + {term: 'ליפ', source: 'ליפענעס', rule: 'ns', reasons: ['plural']}, // lipenes -> lip + {term: 'אײ', source: 'אײערען', rule: 'ns', reasons: ['plural']}, // eyeren -> ey + {term: 'זאָק', source: 'זאָקענס', rule: 'ns', reasons: ['plural']}, // zokens -> zok + {term: 'בוך', source: 'בוךערס', rule: 'ns', reasons: ['plural']}, // bukhers -> bukh + {term: 'קוכן', source: 'קוכןסער', rule: 'ns', reasons: ['plural']}, // kukhnser -> kukh + {term: 'קעניג', source: 'קעניגנס', rule: 'ns', reasons: ['plural']}, // kenigns -> kenig ], }, { @@ -52,6 +58,10 @@ const tests = [ {term: 'אומשטאנד', source: 'אומשטענדן', rule: 'ns', reasons: ['umlaut_plural']}, // umshtendn -> umshtand {term: 'קאצ', source: 'קעצעלע', rule: 'n', reasons: ['diminutive_and_umlaut']}, // ketzele -> katz {term: 'קאצ', source: 'קעצל', rule: 'n', reasons: ['diminutive_and_umlaut']}, // ketzl -> katz + {term: 'באַרג', source: 'בערגן', rule: 'ns', reasons: ['umlaut_plural']}, // bergn -> barg + {term: 'בױמ', source: 'בײמערס', rule: 'ns', reasons: ['umlaut_plural']}, // beymers -> boim + {term: 'קאפּ', source: 'קעפּער', rule: 'ns', reasons: ['umlaut_plural']}, // keper -> kop + {term: 'קאפּ', source: 'קעפּער', rule: 'ns', reasons: ['umlaut_plural']}, // kep -> kop ], }, {