Skip to content

Commit

Permalink
Hopefully fixed an issue that only appears in CD tests, where the Jap…
Browse files Browse the repository at this point in the history
…anese Sokuon character is treated as a small Tsu.
  • Loading branch information
zedseven committed Feb 6, 2021
1 parent 6d2ef79 commit b2db252
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 22 deletions.
21 changes: 13 additions & 8 deletions Romanization/Languages/Japanese/ModifiedHepburn.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
using Romanization.LanguageAgnostic;
using System;
using System.Collections.Generic;
using System.Diagnostics.Contracts;
using System.Globalization;

// ReSharper disable CheckNamespace
// ReSharper disable CommentTypo
Expand Down Expand Up @@ -32,11 +34,14 @@ public sealed class ModifiedHepburn : IRomanizationSystem
private readonly CharSub LongOSub = new CharSub($"o{Choonpu}", Constants.MacronO, false);
private readonly CharSub LongUSub = new CharSub($"u{Choonpu}", Constants.MacronU, false);

private readonly CharSub SyllabicNVowelsSub = new CharSub($"[{SyllabicNHiragana}{SyllabicNKatakana}]([{Constants.LatinVowels}])", "n'${1}");
private readonly CharSub SyllabicNConsonantsSub = new CharSub($"[{SyllabicNHiragana}{SyllabicNKatakana}]([{Constants.LatinConsonants}])", "n${1}");
private readonly CharSub SyllabicNVowelsSub =
new CharSub($"[{SyllabicNHiragana}{SyllabicNKatakana}]([{Constants.LatinVowels}])", "n'${1}");
private readonly CharSub SyllabicNConsonantsSub =
new CharSub($"[{SyllabicNHiragana}{SyllabicNKatakana}]([{Constants.LatinConsonants}])", "n${1}");

private readonly CharSub SokuonGeneralCaseSub = new CharSub($"[{SokuonHiragana}{SokuonKatakana}]([{Constants.LatinConsonants}])", "${1}${1}");
private readonly CharSub SokuonChCaseSub = new CharSub($"[{SokuonHiragana}{SokuonKatakana}]ch", "tch");
private readonly CharSub SokuonGeneralCaseSub =
new CharSub($"[{SokuonHiragana}{SokuonKatakana}]([{Constants.LatinConsonants}])", "${1}${1}", false);
private readonly CharSub SokuonChCaseSub = new CharSub($"[{SokuonHiragana}{SokuonKatakana}]ch", "tch", false);

/// <summary>
/// Instantiates a copy of the system to process romanizations.
Expand Down Expand Up @@ -291,23 +296,23 @@ public ModifiedHepburn()
/// <returns>A romanized version of the text, leaving unrecognized characters untouched. Note that all romanized text will be lowercase.</returns>
[Pure]
public string Process(string text)
=> text
=> Utilities.RunWithCulture(CultureInfo.GetCultureInfo("ja-JP"), () => text
// Replace common alternate characters
.ReplaceCommonAlternates()
// Insert spaces at boundaries between Latin characters and Japanese ones (ie. ニンテンドーDSiブラウザー)
.SeparateLanguageBoundaries()
// Do multi-char combinations first (Yōon)
.ReplaceFromChart(YoonChart)
.ReplaceFromChart(YoonChart, StringComparison.CurrentCulture)
// Then single-char replacements (Gojūon)
.ReplaceFromChart(GojuonChart)
.ReplaceFromChart(GojuonChart, StringComparison.CurrentCulture)
// Do special subsitutions
.ReplaceMany(
// Convert chōonpu usage in original text into macrons to mark long vowels in a romanized manner
LongASub, LongESub, LongISub, LongOSub, LongUSub,
// Render syllabic n as either "n'" or "n" based on whether or not it preceeds a vowel or consonant, respectively
SyllabicNVowelsSub, SyllabicNConsonantsSub,
// Take sokuon usage into account (repeating the following consonant to mark long consonants)
SokuonChCaseSub, SokuonGeneralCaseSub);
SokuonChCaseSub, SokuonGeneralCaseSub));
}
}
}
28 changes: 14 additions & 14 deletions RomanizationTests/JapaneseTests/ModifiedHepburnTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,24 +23,24 @@ public void LongVowelTest()
[TestMethod]
public void SyllabicNTest()
{
Assert.AreEqual("annai", _system.Process("あんない"));
Assert.AreEqual("gunma", _system.Process("ぐんま"));
Assert.AreEqual("kan'i", _system.Process("かんい"));
Assert.AreEqual("shin'you", _system.Process("しんよう"));
Assert.AreEqual("annai", _system.Process("あんない"));
Assert.AreEqual("gunma", _system.Process("ぐんま"));
Assert.AreEqual("kan'i", _system.Process("かんい"));
Assert.AreEqual("shin'you", _system.Process("しんよう"));
}

[TestMethod]
public void LongConsonantTest()
{
Assert.AreEqual("kekka", _system.Process("けっか"));
Assert.AreEqual("sassato", _system.Process("さっさと"));
Assert.AreEqual("zutto", _system.Process("ずっと"));
Assert.AreEqual("kippu", _system.Process("きっぷ"));
Assert.AreEqual("zasshi", _system.Process("ざっし"));
Assert.AreEqual("issho", _system.Process("いっしょ"));
Assert.AreEqual("kotchi", _system.Process("こっち"));
Assert.AreEqual("matcha", _system.Process("まっちゃ"));
Assert.AreEqual("mittsu", _system.Process("みっつ"));
Assert.AreEqual("kekka", _system.Process("けっか"));
Assert.AreEqual("sassato", _system.Process("さっさと"));
Assert.AreEqual("zutto", _system.Process("ずっと"));
Assert.AreEqual("kippu", _system.Process("きっぷ"));
Assert.AreEqual("zasshi", _system.Process("ざっし"));
Assert.AreEqual("issho", _system.Process("いっしょ"));
Assert.AreEqual("kotchi", _system.Process("こっち"));
Assert.AreEqual("matcha", _system.Process("まっちゃ"));
Assert.AreEqual("mittsu", _system.Process("みっつ"));
}
}
}
}

0 comments on commit b2db252

Please sign in to comment.