From cd90387eaa7670b7183faeffd66715f4dc3292ac Mon Sep 17 00:00:00 2001 From: Zacchary Dempsey-Plante Date: Sun, 31 Jan 2021 17:26:47 -0500 Subject: [PATCH] Added a ton of Russian systems (no single standard :/) and reworked the way some of the processing is done. --- Documentation/articles/supported.md | 70 +++++++- README.md | 4 +- Romanization/IRomanizationSystem.cs | 8 + Romanization/LanguageAgnostic.cs | 157 ---------------- Romanization/LanguageAgnostic/CharSub.cs | 43 +++++ Romanization/LanguageAgnostic/Constants.cs | 26 +++ Romanization/LanguageAgnostic/Readings.cs | 107 +++++++++++ .../{ => LanguageAgnostic}/Utilities.cs | 58 +++++- Romanization/Languages/Chinese/HanyuPinyin.cs | 26 +-- .../Languages/Japanese/KanjiReadings.cs | 24 +-- .../Languages/Japanese/ModifiedHepburn.cs | 103 +++++------ .../Languages/Korean/HanjaReadings.cs | 18 +- .../Languages/Korean/RevisedRomanization.cs | 16 +- Romanization/Languages/Russian/AlaLc.cs | 30 ++-- Romanization/Languages/Russian/BgnPcgn.cs | 170 ++++++++++++++++++ Romanization/Languages/Russian/Bs29791958.cs | 145 +++++++++++++++ .../Languages/Russian/Gost16876711.cs | 133 ++++++++++++++ .../Languages/Russian/Gost16876712.cs | 123 +++++++++++++ .../Languages/Russian/Gost7792000A.cs | 137 ++++++++++++++ .../Languages/Russian/Gost7792000B.cs | 145 +++++++++++++++ Romanization/Languages/Russian/Icao9303.cs | 125 +++++++++++++ Romanization/Languages/Russian/IsoR9.cs | 158 ++++++++++++++++ Romanization/Languages/Russian/RoadSigns.cs | 138 ++++++++++++++ Romanization/Languages/Russian/Russian.cs | 12 +- Romanization/Languages/Russian/Scholarly.cs | 159 ++++++++++++++++ .../RussianTests/AlaLcSystemTests.cs | 24 --- RomanizationTests/RussianTests/AlaLcTests.cs | 36 ++++ .../RussianTests/BgnPcgnTests.cs | 36 ++++ .../RussianTests/Bs29791958Tests.cs | 36 ++++ .../RussianTests/Gost16876711Tests.cs | 36 ++++ .../RussianTests/Gost16876712Tests.cs | 36 ++++ .../RussianTests/Gost7792000ATests.cs | 36 ++++ .../RussianTests/Gost7792000BTests.cs | 36 ++++ .../RussianTests/Icao9303Tests.cs | 36 ++++ RomanizationTests/RussianTests/IsoR9Tests.cs | 36 ++++ .../RussianTests/RoadSignsTests.cs | 36 ++++ .../RussianTests/ScholarlyTests.cs | 36 ++++ 37 files changed, 2252 insertions(+), 303 deletions(-) delete mode 100644 Romanization/LanguageAgnostic.cs create mode 100644 Romanization/LanguageAgnostic/CharSub.cs create mode 100644 Romanization/LanguageAgnostic/Constants.cs create mode 100644 Romanization/LanguageAgnostic/Readings.cs rename Romanization/{ => LanguageAgnostic}/Utilities.cs (61%) create mode 100644 Romanization/Languages/Russian/BgnPcgn.cs create mode 100644 Romanization/Languages/Russian/Bs29791958.cs create mode 100644 Romanization/Languages/Russian/Gost16876711.cs create mode 100644 Romanization/Languages/Russian/Gost16876712.cs create mode 100644 Romanization/Languages/Russian/Gost7792000A.cs create mode 100644 Romanization/Languages/Russian/Gost7792000B.cs create mode 100644 Romanization/Languages/Russian/Icao9303.cs create mode 100644 Romanization/Languages/Russian/IsoR9.cs create mode 100644 Romanization/Languages/Russian/RoadSigns.cs create mode 100644 Romanization/Languages/Russian/Scholarly.cs delete mode 100644 RomanizationTests/RussianTests/AlaLcSystemTests.cs create mode 100644 RomanizationTests/RussianTests/AlaLcTests.cs create mode 100644 RomanizationTests/RussianTests/BgnPcgnTests.cs create mode 100644 RomanizationTests/RussianTests/Bs29791958Tests.cs create mode 100644 RomanizationTests/RussianTests/Gost16876711Tests.cs create mode 100644 RomanizationTests/RussianTests/Gost16876712Tests.cs create mode 100644 RomanizationTests/RussianTests/Gost7792000ATests.cs create mode 100644 RomanizationTests/RussianTests/Gost7792000BTests.cs create mode 100644 RomanizationTests/RussianTests/Icao9303Tests.cs create mode 100644 RomanizationTests/RussianTests/IsoR9Tests.cs create mode 100644 RomanizationTests/RussianTests/RoadSignsTests.cs create mode 100644 RomanizationTests/RussianTests/ScholarlyTests.cs diff --git a/Documentation/articles/supported.md b/Documentation/articles/supported.md index e59db3a..e1d1809 100644 --- a/Documentation/articles/supported.md +++ b/Documentation/articles/supported.md @@ -1,7 +1,7 @@ # Supported Languages and Systems The goal of Romanization.NET is to provide a simple, extensive way to romanize widely-used languages as accurately as possible. -Below is a list of all supported languages and systems, with explanations of caveats and limitations if necessary. +Below is a list of all supported languages and systems, with explanations of caveats and limitations if necessary. Languages are ordered lexicographically. @@ -71,3 +71,71 @@ Only one reading type is supported, which is the Hangeul equivalent pronunciatio #### Additional Notes Because the goal of this package is, as the name suggests, romanization, the implementation also includes a function for first converting the Hanja to Hangeul, then romanizing the Hangeul using the system of your choice. + + + +## Russian +At the time of writing, Russian has no single international standard of romanization/transliteration. Instead different systems are used by different groups for different purposes. As a result, there are many systems all implemented with very similar transliterations. + +### [BGN/PCGN](https://en.wikipedia.org/wiki/BGN/PCGN_romanization_of_Russian) +Developed jointly by the Unites States Board on Geographic Names and the Permanent Committee on Geographical Names for British Official Use, it is designed to be easier for anglophones to pronounce. + +Because of this, it's likely a solid choice for romanizing text specifically for English speakers (US/CA/UK audience). + + +### [GOST 7.79-2000 System A](https://en.wikipedia.org/wiki/GOST_7.79-2000) / [ISO 9](https://en.wikipedia.org/wiki/ISO_9) +GOST 7.79-2000(A) focuses on mapping one Cyrillic character to one Latin character, potentially with diacritics. + +ISO 9:1995 is the current standard for Slavic transliteration from the ISO, and is based on ISO/R 9:1968. + +The two systems are functionally identical and in this library are combined into one, under the name of GOST 7.79-2000 System A. This is to retain consistency with the other GOST systems included, as it may be strange to have GOST 7.79-2000 System B but have A under a different name. + + +### [GOST 7.79-2000 System B](https://en.wikipedia.org/wiki/GOST_7.79-2000) +In contrast to the above, GOST 7.79-2000(B) focuses on mapping one Cyrillic character to potentially several Latin characters (eg. `щ -> shh`), but without the use of diacritics. + + +### [GOST 16876-71 Table 1 (UNGEGN)](https://en.wikipedia.org/wiki/GOST_16876-71) +GOST 16876-71(1) focuses on mapping one Cyrillic character to one Latin character, potentially with diacritics. + +It was recommended by the [United Nations Group of Experts on Geographical Names (UNGEGN)](https://en.wikipedia.org/wiki/United_Nations_Group_of_Experts_on_Geographical_Names) in 1987. + +GOST 16876-71 was most recently updated in 1980, and was abandoned in favour of GOST 7.79-2000 in 2002 by the Russian Federation. + + +### [GOST 16876-71 Table 2](https://en.wikipedia.org/wiki/GOST_16876-71) +GOST 16876-71(2) is another table in GOST 16876-71, and focuses on mapping one Cyrillic character to potentially several Latin characters (eg. `щ -> shh`), but without the use of diacritics. + + +### [Scholarly/Scientific Transliteration](https://en.wikipedia.org/wiki/Scientific_transliteration_of_Cyrillic) +The Scholarly transliteration system for Russian actually covers many slavic languages, with Russian being one of them. It tries to preserve pronunciation of the original characters while remaining unambiguous about it's transformations. + + +### [ISO Recommendation No. 9 (ISO/R 9:1968)](https://en.wikipedia.org/wiki/ISO_9#ISO/R_9) +Similar to the scholarly system, ISO/R 9 was created 1954 and updated in 1968. It also supports many Slavic languages, and was the ISO's earliest adoption of scholarly transliteration. + + +### [American Library Association and Library of Congress (ALA-LC) System](https://en.wikipedia.org/wiki/ALA-LC_romanization_for_Russian) +This system was initially established in 1904, and remains largely unchanged since 1941. It's primary purpose is in US, Canadian, and British libraries. + +This system uses some diacritics and uses two-letter tie characters for some Cyrillic characters. + + +### [British Standard 2979:1958](https://en.wikipedia.org/wiki/Romanization_of_Russian#British_Standard) +It is the main system of Oxford University Press, and was used by the British Library up until 1975. + +The ALA-LC system is now used by the British Library instead. + + +### [ICAO Doc 9303](https://www.icao.int/publications/Documents/9303_p3_cons_en.pdf) +Created by the International Civil Aviation Organization, a UN agency, the document is designed to make travel documents machine-readable. + +It contains tables for transliteration to Latin characters from many alphabets, including Cyrillic. The system uses no diacritics whatsoever, only standard ASCII characters. + +The system was put into effect by the Russian government in 2013 for all citizen passports. + + +### [General Road Signs](https://en.wikipedia.org/wiki/Romanization_of_Russian#Road_signs_note) +This is the system generally used for romanization for road signs and the like. + +This originally followed GOST 10807-78 (tables 17, 18), but now follows GOST R 52290-2004 (tables Г.4, Г.5). diff --git a/README.md b/README.md index 52e035b..2409114 100644 --- a/README.md +++ b/README.md @@ -6,10 +6,10 @@ A library for [romanization](https://en.wikipedia.org/wiki/Romanization) of widely-used languages using common romanization systems. -Still a work in progress. Originally made as part of the [NUSRipper](https://github.com/zedseven/NusRipper) project. +Still a work in progress. ## Supported Languages & Documentation -At the moment Romanization.NET supports Chinese, Japanese, and Korean, with individual romanization systems supported for each. +At the moment Romanization.NET supports Chinese, Japanese, Korean, and Russian, with individual romanization systems supported for each. For a comprehensive breakdown of supported languages and systems, [check out the full article](Documentation/articles/supported.md). diff --git a/Romanization/IRomanizationSystem.cs b/Romanization/IRomanizationSystem.cs index eb2a656..5a50831 100644 --- a/Romanization/IRomanizationSystem.cs +++ b/Romanization/IRomanizationSystem.cs @@ -8,6 +8,14 @@ namespace Romanization /// public interface IRomanizationSystem { + /// + /// Whether this is a transliteration system, which is moreso concerned with preserving the characters of a language rather than the sounds.
+ /// Some languages only have transliteration systems.
+ /// For more information, visit: + /// https://en.wikipedia.org/wiki/Transliteration + ///
+ public bool TransliterationSystem { get; } + /// /// The system-specific function that romanizes text according to the system's rules. /// diff --git a/Romanization/LanguageAgnostic.cs b/Romanization/LanguageAgnostic.cs deleted file mode 100644 index 660e6ee..0000000 --- a/Romanization/LanguageAgnostic.cs +++ /dev/null @@ -1,157 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Diagnostics.Contracts; -using System.Linq; -using System.Text.RegularExpressions; - -// ReSharper disable CommentTypo - -namespace Romanization -{ - /// - /// A global class for language-agnostic functions and constants (things that are independent of specific languages). - /// - public static class LanguageAgnostic - { - // General Constants - internal const string Vowels = "aeiouy"; - internal const string Consonants = "bcdfghjklmnpqrstvwxz"; - internal const string Punctuation = @"\.?!"; - internal const char IdeographicFullStop = '。'; - internal const char Interpunct = '・'; - - // Replacement Characters - internal const string MacronA = "ā"; - internal const string MacronE = "ē"; - internal const string MacronI = "ī"; - internal const string MacronO = "ō"; - internal const string MacronU = "ū"; - - // Regex Constants - private static readonly Lazy LanguageBoundaryRegex = new Lazy(() => new Regex( - $"(?:([{LanguageBoundaryChars}{Punctuation}])([^ {LanguageBoundaryChars}{Punctuation}])|([^ {LanguageBoundaryChars}{Punctuation}])([{LanguageBoundaryChars}]))", - RegexOptions.Compiled | RegexOptions.IgnoreCase)); - private const string LanguageBoundaryChars = @"a-z"; - private const string LanguageBoundarySubstitution = "${1}${3} ${2}${4}"; - - /// - /// A string of characters with all possible readings (pronunciations) for each character. - /// - /// The reading type enum to use, which contains all supported readings for a given language or system.
For example, .
- public class ReadingsString - where TType : Enum - { - /// - /// The characters of the string.
- /// Each one stores both the character itself (not necessarily equivalent to a char, as some Hànzì characters are double-wide), and all known readings (pronunciations). - ///
- public readonly ReadingCharacter[] Characters; - - internal ReadingsString(ReadingCharacter[] characters) - { - Characters = characters; - } - - // TODO: Add additional ToString() implementations that do display reading types. - /// - /// Returns a string that displays all readings of each character.
- /// For characters with 0 readings, they are displayed simply as themselves.
- /// For characters with 1 reading, they are displayed as their only reading.
- /// For characters with more than 1 reading, they are displayed as a space-delimited list of all readings in order, within square brackets.
- /// Example: "xiàndài [hàn tān][yǔ yù] [pín bīn][shuài lǜ lüe l̈ù] cí[diǎn tiǎn]."
- /// Note that this does not display the source of each reading. - ///
- /// A string with all known readings of each character. - public override string ToString() - => Characters.Aggregate("", (current, character) => current + character.FlattenReadings()); - } - - /// - /// A character with all possible readings (pronunciations). - /// - /// The reading type enum to use, which contains all supported readings for a given language or system.
For example, .
- public class ReadingCharacter - where TType : Enum - { - /// - /// The actual character value.
Note that this is not necessarily one char in length - some Hànzì characters go outside the Basic Multilingual Plane (BMP), and as such take up 32 bits (two 16-bit chars). - ///
- public readonly string Character; - /// - /// The collection of known readings for the character, in order as specified in the function used to generate this object. - /// - public readonly Reading[] Readings; - - internal ReadingCharacter(string character, IEnumerable> readings) - { - Character = character; - Readings = readings.ToArray(); - } - - /// - /// Returns a string that represents the current object.
- /// The format is: '<char>' [<readings>] - ///
- /// A string with the character and all known readings. - public override string ToString() - => $"'{Character}' {FlattenReadings()}"; - - /// - /// Returns a string starting and ending with square brackets, containing all readings in the order they appear in .
- /// If the character has no known readings, the character itself is returned instead.
- /// Example: [shuài lǜ lüe l̈ù]
- /// Note this does not output the source of each reading. - ///
- /// A string representation of all readings of the character, or the character itself if there are none. - public string FlattenReadings() - { - string[] readings = Readings.Select(r => r.Value).Distinct().ToArray(); - if (readings.Length > 1) - return $"[{string.Join(" ", readings)}]"; - return readings.Length == 1 ? readings[0] : Character; - } - } - - /// - /// A reading (pronunciation) of a character. - /// - /// The reading type enum to use, which contains all supported readings for a given language or system.
For example, .
- public class Reading - where TType : Enum - { - /// - /// The type of reading it is. For example, it could be . - /// - public readonly TType Type; - /// - /// The reading itself - a romanized string representing how a character should be pronounced. - /// - public readonly string Value; - - internal Reading(TType type, string value) - { - Type = type; - Value = value; - } - } - - /// - /// Remove common alternative characters, such as the ideographic full-stop (replaced with a period). - /// - /// The text to replace in. - /// The original text with common alternate characters replaced. - [Pure] - internal static string ReplaceCommonAlternates(string text) - => text.Replace(IdeographicFullStop, '.') - .Replace(Interpunct, ' '); - - /// - /// Insert spaces at boundaries between Latin and non-Latin characters (ie. ニンテンドーDSiブラウザー -> ニンテンドー DSi ブラウザー). - /// - /// The text to insert spaces in. - /// The text with spaces inserted at language boundaries. - [Pure] - internal static string SeparateLanguageBoundaries(string text) - => LanguageBoundaryRegex.Value.Replace(text, LanguageBoundarySubstitution); - } -} diff --git a/Romanization/LanguageAgnostic/CharSub.cs b/Romanization/LanguageAgnostic/CharSub.cs new file mode 100644 index 0000000..f733ab5 --- /dev/null +++ b/Romanization/LanguageAgnostic/CharSub.cs @@ -0,0 +1,43 @@ +using System.Text.RegularExpressions; + +namespace Romanization.LanguageAgnostic +{ + internal interface ISub + { + public string Replace(string text); + } + + internal class CharSub : ISub + { + private readonly Regex _findRegex; + private readonly string _substitution; + + public CharSub(string pattern, string substitution, bool ignoreCase = true) + { + _findRegex = new Regex(pattern, ignoreCase ? RegexOptions.Compiled | RegexOptions.IgnoreCase : RegexOptions.Compiled); + _substitution = substitution; + } + + public string Replace(string text) + => _findRegex.Replace(text, _substitution); + } + + internal class CharSubCased : ISub + { + private readonly Regex _findRegexUpper; + private readonly Regex _findRegexLower; + private readonly string _substitutionUpper; + private readonly string _substitutionLower; + + public CharSubCased(string patternUpper, string patternLower, string substitutionUpper, string substitutionLower) + { + _findRegexUpper = new Regex(patternUpper, RegexOptions.Compiled); + _findRegexLower = new Regex(patternLower, RegexOptions.Compiled); + _substitutionUpper = substitutionUpper; + _substitutionLower = substitutionLower; + } + + public string Replace(string text) + => _findRegexLower.Replace(_findRegexUpper.Replace(text, _substitutionUpper), _substitutionLower); + } +} diff --git a/Romanization/LanguageAgnostic/Constants.cs b/Romanization/LanguageAgnostic/Constants.cs new file mode 100644 index 0000000..43ae9a1 --- /dev/null +++ b/Romanization/LanguageAgnostic/Constants.cs @@ -0,0 +1,26 @@ +using System.Text.RegularExpressions; + +// ReSharper disable CommentTypo + +namespace Romanization.LanguageAgnostic +{ + /// + /// A global class for language-agnostic functions and constants (things that are independent of specific languages). + /// + internal static class Constants + { + // General Constants + public const string Vowels = "aeiouy"; + public const string Consonants = "bcdfghjklmnpqrstvwxz"; + public const string Punctuation = @"\.?!"; + public const char IdeographicFullStop = '。'; + public const char Interpunct = '・'; + + // Replacement Characters + public const string MacronA = "ā"; + public const string MacronE = "ē"; + public const string MacronI = "ī"; + public const string MacronO = "ō"; + public const string MacronU = "ū"; + } +} diff --git a/Romanization/LanguageAgnostic/Readings.cs b/Romanization/LanguageAgnostic/Readings.cs new file mode 100644 index 0000000..022837e --- /dev/null +++ b/Romanization/LanguageAgnostic/Readings.cs @@ -0,0 +1,107 @@ +using System; +using System.Collections.Generic; +using System.Linq; + +namespace Romanization.LanguageAgnostic +{ + /// + /// A string of characters with all possible readings (pronunciations) for each character. + /// + /// The reading type enum to use, which contains all supported readings for a given language or system.
For example, .
+ public class ReadingsString + where TType : Enum + { + /// + /// The characters of the string.
+ /// Each one stores both the character itself (not necessarily equivalent to a char, as some Hànzì characters are double-wide), and all known readings (pronunciations). + ///
+ public readonly ReadingCharacter[] Characters; + + internal ReadingsString(ReadingCharacter[] characters) + { + Characters = characters; + } + + // TODO: Add additional ToString() implementations that do display reading types. + /// + /// Returns a string that displays all readings of each character.
+ /// For characters with 0 readings, they are displayed simply as themselves.
+ /// For characters with 1 reading, they are displayed as their only reading.
+ /// For characters with more than 1 reading, they are displayed as a space-delimited list of all readings in order, within square brackets.
+ /// Example: "xiàndài [hàn tān][yǔ yù] [pín bīn][shuài lǜ lüe l̈ù] cí[diǎn tiǎn]."
+ /// Note that this does not display the source of each reading. + ///
+ /// A string with all known readings of each character. + public override string ToString() + => Characters.Aggregate("", (current, character) => current + character.FlattenReadings()); + } + + /// + /// A character with all possible readings (pronunciations). + /// + /// The reading type enum to use, which contains all supported readings for a given language or system.
For example, .
+ public class ReadingCharacter + where TType : Enum + { + /// + /// The actual character value.
Note that this is not necessarily one char in length - some Hànzì characters go outside the Basic Multilingual Plane (BMP), and as such take up 32 bits (two 16-bit chars). + ///
+ public readonly string Character; + /// + /// The collection of known readings for the character, in order as specified in the function used to generate this object. + /// + public readonly Reading[] Readings; + + internal ReadingCharacter(string character, IEnumerable> readings) + { + Character = character; + Readings = readings.ToArray(); + } + + /// + /// Returns a string that represents the current object.
+ /// The format is: '<char>' [<readings>] + ///
+ /// A string with the character and all known readings. + public override string ToString() + => $"'{Character}' {FlattenReadings()}"; + + /// + /// Returns a string starting and ending with square brackets, containing all readings in the order they appear in .
+ /// If the character has no known readings, the character itself is returned instead.
+ /// Example: [shuài lǜ lüe l̈ù]
+ /// Note this does not output the source of each reading. + ///
+ /// A string representation of all readings of the character, or the character itself if there are none. + public string FlattenReadings() + { + string[] readings = Readings.Select(r => r.Value).Distinct().ToArray(); + if (readings.Length > 1) + return $"[{string.Join(" ", readings)}]"; + return readings.Length == 1 ? readings[0] : Character; + } + } + + /// + /// A reading (pronunciation) of a character. + /// + /// The reading type enum to use, which contains all supported readings for a given language or system.
For example, .
+ public class Reading + where TType : Enum + { + /// + /// The type of reading it is. For example, it could be . + /// + public readonly TType Type; + /// + /// The reading itself - a romanized string representing how a character should be pronounced. + /// + public readonly string Value; + + internal Reading(TType type, string value) + { + Type = type; + Value = value; + } + } +} diff --git a/Romanization/Utilities.cs b/Romanization/LanguageAgnostic/Utilities.cs similarity index 61% rename from Romanization/Utilities.cs rename to Romanization/LanguageAgnostic/Utilities.cs index 6d7882b..1942ca2 100644 --- a/Romanization/Utilities.cs +++ b/Romanization/LanguageAgnostic/Utilities.cs @@ -1,9 +1,12 @@ using System; using System.Collections.Generic; +using System.Diagnostics.Contracts; using System.IO; +using System.Linq; using System.Reflection; +using System.Text.RegularExpressions; -namespace Romanization +namespace Romanization.LanguageAgnostic { internal static class Utilities { @@ -41,8 +44,8 @@ public CsvLoadingException(string message, Exception inner) : base(message, inne /// The dictionary to load into. /// The function that maps CSV entry first values to dictionary values. /// The function that maps CSV entry second values to dictionary values. - /// The provided stream cannot be read. - /// Unable to load the CSV file. + /// The provided stream cannot be read. + /// Unable to load the CSV file. public static void LoadCharacterMap(string fileName, IDictionary dict, Func keyMapper, Func valueMapper) { using FileStream csvStream = File.OpenRead(Path.Combine(LanguageCharacterMapsPath, fileName)); @@ -58,8 +61,8 @@ public static void LoadCharacterMap(string fileName, IDictionaryThe dictionary to load into. /// The function that maps CSV entry first values to dictionary values. /// The function that maps CSV entry second values to dictionary values. - /// The provided stream cannot be read. - /// Unable to load the CSV file. + /// The provided stream cannot be read. + /// Unable to load the CSV file. public static void LoadCsvIntoDictionary(this FileStream stream, IDictionary dict, Func keyMapper, Func valueMapper) { if (!stream.CanRead) @@ -92,9 +95,11 @@ public static void LoadCsvIntoDictionary(this FileStream stream, IDi } } + [Pure] private static string WithoutQuotes(this string str) => str.Length >= 2 && str[0] == '"' && str[^1] == '"' ? str[1..^1] : str; + [Pure] public static string[] SplitIntoSurrogatePairs(this string str) { List retList = new List(str.Length); @@ -111,5 +116,48 @@ public static string[] SplitIntoSurrogatePairs(this string str) return retList.ToArray(); } + + [Pure] + public static string ReplaceMany(this string text, params ISub[] subs) + => subs.Aggregate(text, (str, sub) => sub.Replace(str)); + + [Pure] + public static string ReplaceFromChart(this string text, Dictionary chart) + => chart.Keys.Aggregate(text, (current, key) + => current.Replace(key, chart[key])); + + [Pure] + public static string ReplaceFromChart(this string text, Dictionary chart) + => chart.Keys.Aggregate(text, (current, key) + => current.Replace(key, chart[key])); + + private const string LanguageBoundaryChars = @"a-z"; + private static readonly Lazy LanguageBoundarySubstitution = new Lazy(() => + new CharSub( + $"(?:([{LanguageBoundaryChars}{Constants.Punctuation}])([^ {LanguageBoundaryChars}{Constants.Punctuation}])|([^ {LanguageBoundaryChars}{Constants.Punctuation}])([{LanguageBoundaryChars}]))", + "${1}${3} ${2}${4}")); + + /// + /// Remove common alternative characters, such as the ideographic full-stop (replaced with a period). + /// + /// The text to replace in. + /// The original text with common alternate characters replaced. + [Pure] + internal static string ReplaceCommonAlternates(this string text) + => text.Replace(Constants.IdeographicFullStop, '.') + .Replace(Constants.Interpunct, ' '); + + /// + /// Insert spaces at boundaries between Latin and non-Latin characters (ie. ニンテンドーDSiブラウザー -> ニンテンドー DSi ブラウザー). + /// + /// The text to insert spaces in. + /// The text with spaces inserted at language boundaries. + [Pure] + internal static string SeparateLanguageBoundaries(this string text) + => LanguageBoundarySubstitution.Value.Replace(text); + + [Pure] + internal static string WithoutChars(this string charset, string withoutChars) + => withoutChars.Aggregate(charset, (set, withoutChar) => set.Replace($"{withoutChar}", "")); } } diff --git a/Romanization/Languages/Chinese/HanyuPinyin.cs b/Romanization/Languages/Chinese/HanyuPinyin.cs index 642dbf9..c3700ae 100644 --- a/Romanization/Languages/Chinese/HanyuPinyin.cs +++ b/Romanization/Languages/Chinese/HanyuPinyin.cs @@ -1,4 +1,5 @@ -using System; +using Romanization.LanguageAgnostic; +using System; using System.Collections.Generic; using System.Diagnostics.Contracts; using System.Linq; @@ -27,6 +28,9 @@ public static partial class Chinese /// public sealed class HanyuPinyinSystem : IReadingsRomanizationSystem { + /// + public bool TransliterationSystem => false; + /// /// The supported reading types for Hànyǔ Pīnyīn. /// @@ -94,22 +98,22 @@ public string Process(string text) /// /// The text to romanize. /// The reading types to use. - /// A with all readings for each character in . + /// A with all readings for each character in . [Pure] - public LanguageAgnostic.ReadingsString ProcessWithReadings(string text, ReadingTypes readingsToUse) - => new LanguageAgnostic.ReadingsString(text.SplitIntoSurrogatePairs() + public ReadingsString ProcessWithReadings(string text, ReadingTypes readingsToUse) + => new ReadingsString(text.SplitIntoSurrogatePairs() .Select(c => { - List> readings = new List>(text.Length); + List> readings = new List>(text.Length); if (readingsToUse.HasFlag(ReadingTypes.HanyuPinyin) && HanyuPinyinReadings.TryGetValue(c, out string[] rawHanyuPinyinReadings)) - readings.AddRange(rawHanyuPinyinReadings.Select(r => new LanguageAgnostic.Reading(ReadingTypes.HanyuPinyin, r))); + readings.AddRange(rawHanyuPinyinReadings.Select(r => new Reading(ReadingTypes.HanyuPinyin, r))); if (readingsToUse.HasFlag(ReadingTypes.HanyuPinlu) && HanyuPinluReadings.TryGetValue(c, out string[] rawHanyuPinluReadings)) - readings.AddRange(rawHanyuPinluReadings.Select(r => new LanguageAgnostic.Reading(ReadingTypes.HanyuPinlu, r))); + readings.AddRange(rawHanyuPinluReadings.Select(r => new Reading(ReadingTypes.HanyuPinlu, r))); if (readingsToUse.HasFlag(ReadingTypes.XHC) && XhcReadings.TryGetValue(c, out string[] rawXhcReadings)) - readings.AddRange(rawXhcReadings.Select(r => new LanguageAgnostic.Reading(ReadingTypes.XHC, r))); + readings.AddRange(rawXhcReadings.Select(r => new Reading(ReadingTypes.XHC, r))); - return new LanguageAgnostic.ReadingCharacter(c, readings); + return new ReadingCharacter(c, readings); }) .ToArray()); @@ -119,9 +123,9 @@ public LanguageAgnostic.ReadingsString ProcessWithReadings(string /// Returns the following readings for characters if they exist: standard Hànyǔ Pīnyīn, Hànyǔ Pīnyīn as it appeared in Xiàndài Hànyǔ Pínlǜ Cídiǎn, and Hànyǔ Pīnyīn as it appeared in Xiàndài Hànyǔ Cídiǎn. /// /// The text to romanize. - /// A with all readings for each character in . + /// A with all readings for each character in . [Pure] - public LanguageAgnostic.ReadingsString ProcessWithReadings(string text) + public ReadingsString ProcessWithReadings(string text) => ProcessWithReadings(text, ReadingTypes.HanyuPinyin | ReadingTypes.HanyuPinlu | ReadingTypes.XHC); } } diff --git a/Romanization/Languages/Japanese/KanjiReadings.cs b/Romanization/Languages/Japanese/KanjiReadings.cs index 2d75740..16b9476 100644 --- a/Romanization/Languages/Japanese/KanjiReadings.cs +++ b/Romanization/Languages/Japanese/KanjiReadings.cs @@ -1,4 +1,5 @@ -using System; +using Romanization.LanguageAgnostic; +using System; using System.Collections.Generic; using System.Diagnostics.Contracts; using System.Linq; @@ -27,6 +28,9 @@ public static partial class Japanese /// public sealed class KanjiReadingsSystem : IReadingsRomanizationSystem { + /// + public bool TransliterationSystem => false; + /// /// The supported reading types for Kanji. /// @@ -102,20 +106,20 @@ public string ProcessWithKana(string text, IRomanizationSystem system = null) /// /// The text to romanize. /// The reading types to use. - /// A with all readings for each character in . + /// A with all readings for each character in . [Pure] - public LanguageAgnostic.ReadingsString ProcessWithReadings(string text, ReadingTypes readingsToUse) - => new LanguageAgnostic.ReadingsString(text.SplitIntoSurrogatePairs() + public ReadingsString ProcessWithReadings(string text, ReadingTypes readingsToUse) + => new ReadingsString(text.SplitIntoSurrogatePairs() .Select(c => { - List> readings = new List>(text.Length); + List> readings = new List>(text.Length); if (readingsToUse.HasFlag(ReadingTypes.Kunyomi) && KanjiKunReadings.TryGetValue(c, out string[] rawKanjiKunReadings)) - readings.AddRange(rawKanjiKunReadings.Select(r => new LanguageAgnostic.Reading(ReadingTypes.Kunyomi, r))); + readings.AddRange(rawKanjiKunReadings.Select(r => new Reading(ReadingTypes.Kunyomi, r))); if (readingsToUse.HasFlag(ReadingTypes.Onyomi) && KanjiOnReadings.TryGetValue(c, out string[] rawKanjiOnReadings)) - readings.AddRange(rawKanjiOnReadings.Select(r => new LanguageAgnostic.Reading(ReadingTypes.Onyomi, r))); + readings.AddRange(rawKanjiOnReadings.Select(r => new Reading(ReadingTypes.Onyomi, r))); - return new LanguageAgnostic.ReadingCharacter(c, readings); + return new ReadingCharacter(c, readings); }) .ToArray()); @@ -125,9 +129,9 @@ public LanguageAgnostic.ReadingsString ProcessWithReadings(string /// Returns the following readings for characters if they exist: Kun'yomi and On'yomi. /// /// The text to romanize. - /// A with all readings for each character in . + /// A with all readings for each character in . [Pure] - public LanguageAgnostic.ReadingsString ProcessWithReadings(string text) + public ReadingsString ProcessWithReadings(string text) => ProcessWithReadings(text, ReadingTypes.Kunyomi | ReadingTypes.Onyomi); } } diff --git a/Romanization/Languages/Japanese/ModifiedHepburn.cs b/Romanization/Languages/Japanese/ModifiedHepburn.cs index 5e6f2af..ac517c1 100644 --- a/Romanization/Languages/Japanese/ModifiedHepburn.cs +++ b/Romanization/Languages/Japanese/ModifiedHepburn.cs @@ -1,8 +1,7 @@ -using System; +using Romanization.LanguageAgnostic; +using System; using System.Collections.Generic; using System.Diagnostics.Contracts; -using System.Linq; -using System.Text.RegularExpressions; // ReSharper disable CheckNamespace // ReSharper disable CommentTypo @@ -28,39 +27,38 @@ public static partial class Japanese /// public sealed class ModifiedHepburnSystem : IRomanizationSystem { + /// + public bool TransliterationSystem => false; + // System-Specific Constants private static readonly Dictionary GojuonChart = new Dictionary(); private static readonly Dictionary YoonChart = new Dictionary(); - private static Regex LongVowelRegexA; - private static Regex LongVowelRegexE; - private static Regex LongVowelRegexI; - private static Regex LongVowelRegexO; - private static Regex LongVowelRegexU; + private static CharSub LongASub; + private static CharSub LongESub; + private static CharSub LongISub; + private static CharSub LongOSub; + private static CharSub LongUSub; - private static Regex SyllabicNVowelsRegex; - private static Regex SyllabicNConsonantsRegex; - private const string SyllabicNVowelsSubstitution = "n'${1}"; - private const string SyllabicNConsonantsSubstitution = "n${1}"; + private static CharSub SyllabicNVowelsSub; + private static CharSub SyllabicNConsonantsSub; - private static Regex SokuonGeneralCaseRegex; - private static Regex SokuonChCaseRegex; - private const string SokuonGeneralCaseSubstitution = "${1}${1}"; - private const string SokuonChCaseSubstitution = "tch"; + private static CharSub SokuonGeneralCaseSub; + private static CharSub SokuonChCaseSub; internal ModifiedHepburnSystem() { - LongVowelRegexA = new Regex($"a{Choonpu}", RegexOptions.Compiled); - LongVowelRegexE = new Regex($"e{Choonpu}", RegexOptions.Compiled); - LongVowelRegexI = new Regex($"i{Choonpu}", RegexOptions.Compiled); - LongVowelRegexO = new Regex($"o{Choonpu}", RegexOptions.Compiled); - LongVowelRegexU = new Regex($"u{Choonpu}", RegexOptions.Compiled); + LongASub = new CharSub($"a{Choonpu}", Constants.MacronA, false); + LongESub = new CharSub($"e{Choonpu}", Constants.MacronE, false); + LongISub = new CharSub($"i{Choonpu}", Constants.MacronI, false); + LongOSub = new CharSub($"o{Choonpu}", Constants.MacronO, false); + LongUSub = new CharSub($"u{Choonpu}", Constants.MacronU, false); - SyllabicNVowelsRegex = new Regex($"[{SyllabicNHiragana}{SyllabicNKatakana}]([{LanguageAgnostic.Vowels}])", RegexOptions.Compiled | RegexOptions.IgnoreCase); - SyllabicNConsonantsRegex = new Regex($"[{SyllabicNHiragana}{SyllabicNKatakana}]([{LanguageAgnostic.Consonants}])", RegexOptions.Compiled | RegexOptions.IgnoreCase); + SyllabicNVowelsSub = new CharSub($"[{SyllabicNHiragana}{SyllabicNKatakana}]([{Constants.Vowels}])", "n'${1}"); + SyllabicNConsonantsSub = new CharSub($"[{SyllabicNHiragana}{SyllabicNKatakana}]([{Constants.Consonants}])", "n${1}"); - SokuonGeneralCaseRegex = new Regex($"[{SokuonHiragana}{SokuonKatakana}]([{LanguageAgnostic.Consonants}])", RegexOptions.Compiled | RegexOptions.IgnoreCase); - SokuonChCaseRegex = new Regex($"[{SokuonHiragana}{SokuonKatakana}]ch", RegexOptions.Compiled | RegexOptions.IgnoreCase); + SokuonGeneralCaseSub = new CharSub($"[{SokuonHiragana}{SokuonKatakana}]([{Constants.Consonants}])", "${1}${1}"); + SokuonChCaseSub = new CharSub($"[{SokuonHiragana}{SokuonKatakana}]ch", "tch"); #region Romanization Chart // Sourced from https://en.wikipedia.org/wiki/Hepburn_romanization#Romanization_charts @@ -310,44 +308,23 @@ internal ModifiedHepburnSystem() /// A romanized version of the text, leaving unrecognized characters untouched. Note that all romanized text will be lowercase. [Pure] public string Process(string text) - { - // Replace common alternate characters - text = LanguageAgnostic.ReplaceCommonAlternates(text); - - // Insert spaces at boundaries between Latin characters and Japanese ones (ie. ニンテンドーDSiブラウザー) - text = LanguageAgnostic.SeparateLanguageBoundaries(text); - - // Do multi-char combinations first (Yōon) - text = YoonChart.Keys.Aggregate(text, (current, yoonString) - => current.Replace(yoonString, YoonChart[yoonString])); - // Then single-char replacements (Gojūon) - text = GojuonChart.Keys.Aggregate(text, (current, gojuonChar) - => current.Replace(gojuonChar, GojuonChart[gojuonChar])); - - // Convert chōonpu usage in original text into macrons to mark long vowels in a romanized manner - text = LongVowelRegexA.Replace( - LongVowelRegexE.Replace( - LongVowelRegexI.Replace( - LongVowelRegexO.Replace( - LongVowelRegexU.Replace(text, - LanguageAgnostic.MacronU), - LanguageAgnostic.MacronO), - LanguageAgnostic.MacronI), - LanguageAgnostic.MacronE), - LanguageAgnostic.MacronA); - - // Render syllabic n as either "n'" or "n" based on whether or not it preceeds a vowel or consonant, respectively - text = SyllabicNConsonantsRegex.Replace( - SyllabicNVowelsRegex.Replace(text, SyllabicNVowelsSubstitution), - SyllabicNConsonantsSubstitution); - - // Take sokuon usage into account (repeating the following consonant to mark long consonants) - text = SokuonGeneralCaseRegex.Replace( - SokuonChCaseRegex.Replace(text, SokuonChCaseSubstitution), - SokuonGeneralCaseSubstitution); - - return text; - } + => text + // Replace common alternate characters + .ReplaceCommonAlternates() + // Insert spaces at boundaries between Latin characters and Japanese ones (ie. ニンテンドーDSiブラウザー) + .SeparateLanguageBoundaries() + // Do multi-char combinations first (Yōon) + .ReplaceFromChart(YoonChart) + // Then single-char replacements (Gojūon) + .ReplaceFromChart(GojuonChart) + // Do special subsitutions + .ReplaceMany( + // Convert chōonpu usage in original text into macrons to mark long vowels in a romanized manner + LongASub, LongESub, LongISub, LongOSub, LongUSub, + // Render syllabic n as either "n'" or "n" based on whether or not it preceeds a vowel or consonant, respectively + SyllabicNVowelsSub, SyllabicNConsonantsSub, + // Take sokuon usage into account (repeating the following consonant to mark long consonants) + SokuonChCaseSub, SokuonGeneralCaseSub); } } } diff --git a/Romanization/Languages/Korean/HanjaReadings.cs b/Romanization/Languages/Korean/HanjaReadings.cs index 3b96e88..d09970c 100644 --- a/Romanization/Languages/Korean/HanjaReadings.cs +++ b/Romanization/Languages/Korean/HanjaReadings.cs @@ -1,4 +1,5 @@ -using System; +using Romanization.LanguageAgnostic; +using System; using System.Collections.Generic; using System.Diagnostics.Contracts; using System.Linq; @@ -27,6 +28,9 @@ public static partial class Korean /// public sealed class HanjaReadingsSystem : IReadingsRomanizationSystem { + /// + public bool TransliterationSystem => false; + /// /// The supported reading types for Hanja. In this case, Hangeul is the only supported one. /// @@ -89,18 +93,18 @@ public string ProcessToHangeul(string text) /// Returns a collection of all the characters in , but with all readings (pronunciations) of each in Hangeul (the Korean alphabet).
/// /// The text to romanize. - /// A with all readings for each character in . + /// A with all readings for each character in . [Pure] - public LanguageAgnostic.ReadingsString ProcessWithReadings(string text) - => new LanguageAgnostic.ReadingsString(text.SplitIntoSurrogatePairs() + public ReadingsString ProcessWithReadings(string text) + => new ReadingsString(text.SplitIntoSurrogatePairs() .Select(c => { - List> readings = new List>(text.Length); + List> readings = new List>(text.Length); if (HangeulReadings.TryGetValue(c, out char[] rawHanjaHangeulReadings)) - readings.AddRange(rawHanjaHangeulReadings.Select(r => new LanguageAgnostic.Reading(ReadingTypes.Hangeul, r.ToString()))); + readings.AddRange(rawHanjaHangeulReadings.Select(r => new Reading(ReadingTypes.Hangeul, r.ToString()))); - return new LanguageAgnostic.ReadingCharacter(c, readings); + return new ReadingCharacter(c, readings); }) .ToArray()); } diff --git a/Romanization/Languages/Korean/RevisedRomanization.cs b/Romanization/Languages/Korean/RevisedRomanization.cs index 2b06ebb..0dcbe9b 100644 --- a/Romanization/Languages/Korean/RevisedRomanization.cs +++ b/Romanization/Languages/Korean/RevisedRomanization.cs @@ -1,4 +1,5 @@ -using System; +using Romanization.LanguageAgnostic; +using System; using System.Collections.Generic; using System.Diagnostics.Contracts; using System.Linq; @@ -28,6 +29,9 @@ public static partial class Korean /// public sealed class RevisedRomanizationSystem : IRomanizationSystem { + /// + public bool TransliterationSystem => false; + private readonly struct HyphenString { public readonly AspirationString BaseString; @@ -214,11 +218,11 @@ internal RevisedRomanizationSystem() [Pure] public string Process(string text, bool givenName, bool noun = false, bool hyphenateSyllables = false) { - // Replace common alternate characters - text = LanguageAgnostic.ReplaceCommonAlternates(text); - - // Insert spaces at boundaries between Latin characters and Korean ones - text = LanguageAgnostic.SeparateLanguageBoundaries(text); + text = text + // Replace common alternate characters + .ReplaceCommonAlternates() + // Insert spaces at boundaries between Latin characters and Korean ones + .SeparateLanguageBoundaries(); // Decompose all syllable blocks in text into their component jamo List jamoList = text.SelectMany(c => diff --git a/Romanization/Languages/Russian/AlaLc.cs b/Romanization/Languages/Russian/AlaLc.cs index 6b2c045..0c1d36e 100644 --- a/Romanization/Languages/Russian/AlaLc.cs +++ b/Romanization/Languages/Russian/AlaLc.cs @@ -1,6 +1,7 @@ using System; using System.Collections.Generic; using System.Linq; +using Romanization.LanguageAgnostic; // ReSharper disable CheckNamespace // ReSharper disable CommentTypo @@ -85,7 +86,7 @@ internal AlaLcSystem() RomanizationTable["ф"] = "f"; RomanizationTable["Х"] = "Kh"; RomanizationTable["х"] = "kh"; - RomanizationTable["Ц"] = "T͡S"; + RomanizationTable["Ц"] = "T͡s"; RomanizationTable["ц"] = "t͡s"; RomanizationTable["Ч"] = "Ch"; RomanizationTable["ч"] = "ch"; @@ -93,53 +94,53 @@ internal AlaLcSystem() RomanizationTable["ш"] = "sh"; RomanizationTable["Щ"] = "Shch"; RomanizationTable["щ"] = "shch"; - RomanizationTable["Ъ"] = "\""; - RomanizationTable["ъ"] = "\""; + RomanizationTable["Ъ"] = "ʺ"; + RomanizationTable["ъ"] = "ʺ"; RomanizationTable["Ы"] = "Y"; RomanizationTable["ы"] = "y"; RomanizationTable["Ь"] = "ʹ"; RomanizationTable["ь"] = "ʹ"; RomanizationTable["Э"] = "Ė"; RomanizationTable["э"] = "ė"; - RomanizationTable["Ю"] = "I͡U"; + RomanizationTable["Ю"] = "I͡u"; RomanizationTable["ю"] = "i͡u"; - RomanizationTable["Я"] = "I͡A"; + RomanizationTable["Я"] = "I͡a"; RomanizationTable["я"] = "i͡a"; // Letters eliminated in the orthographic reform of 1918 RomanizationTable["І"] = "І̄"; RomanizationTable["і"] = "ī"; - RomanizationTable["Ѣ"] = "I͡E"; + RomanizationTable["Ѣ"] = "I͡e"; RomanizationTable["ѣ"] = "i͡e"; RomanizationTable["Ѳ"] = "Ḟ"; RomanizationTable["ѳ"] = "ḟ"; RomanizationTable["Ѵ"] = "Ẏ"; RomanizationTable["ѵ"] = "ẏ"; - // Other obsolete letters + // Pre-18th century letters RomanizationTable["Є"] = "Ē"; RomanizationTable["є"] = "ē"; - RomanizationTable["Ѥ"] = "I͡E"; + RomanizationTable["Ѥ"] = "I͡e"; RomanizationTable["ѥ"] = "i͡e"; RomanizationTable["Ѕ"] = "Ż"; RomanizationTable["ѕ"] = "ż"; RomanizationTable["Ꙋ"] = "Ū"; RomanizationTable["ꙋ"] = "ū"; - RomanizationTable["Ѿ"] = "Ō͡T"; + RomanizationTable["Ѿ"] = "Ō͡t"; RomanizationTable["ѿ"] = "ō͡t"; RomanizationTable["Ѡ"] = "Ō"; RomanizationTable["ѡ"] = "ō"; RomanizationTable["Ѧ"] = "Ę"; RomanizationTable["ѧ"] = "ę"; - RomanizationTable["Ѯ"] = "K͡S"; + RomanizationTable["Ѯ"] = "K͡s"; RomanizationTable["ѯ"] = "k͡s"; - RomanizationTable["Ѱ"] = "P͡S"; + RomanizationTable["Ѱ"] = "P͡s"; RomanizationTable["ѱ"] = "p͡s"; RomanizationTable["Ѫ"] = "Ǫ"; RomanizationTable["ѫ"] = "ǫ"; - RomanizationTable["Ѩ"] = "I͡Ę"; + RomanizationTable["Ѩ"] = "I͡ę"; RomanizationTable["ѩ"] = "i͡ę"; - RomanizationTable["Ѭ"] = "I͡Ǫ"; + RomanizationTable["Ѭ"] = "I͡ǫ"; RomanizationTable["ѭ"] = "i͡ǫ"; #endregion @@ -151,8 +152,7 @@ internal AlaLcSystem() /// The text to romanize. /// A romanized version of the text, leaving unrecognized characters untouched. public string Process(string text) - => RomanizationTable.Keys.Aggregate(text, (current, russianString) - => current.Replace(russianString, RomanizationTable[russianString])); + => text.ReplaceFromChart(RomanizationTable); } } } diff --git a/Romanization/Languages/Russian/BgnPcgn.cs b/Romanization/Languages/Russian/BgnPcgn.cs new file mode 100644 index 0000000..4d3cf08 --- /dev/null +++ b/Romanization/Languages/Russian/BgnPcgn.cs @@ -0,0 +1,170 @@ +using Romanization.LanguageAgnostic; +using System; +using System.Collections.Generic; + +// ReSharper disable CheckNamespace +// ReSharper disable CommentTypo +// ReSharper disable IdentifierTypo +// ReSharper disable StringLiteralTypo +// ReSharper disable InconsistentNaming + +namespace Romanization +{ + public static partial class Russian + { + /// + /// The BGN/PCGN system of romanization for Russian.
+ /// It was developed by the Unites States Board on Geographic Names and the Permanent Committee on Geographical Names for British Official Use, and is + /// designed to be easier for anglophones to pronounce.
+ /// For more information, visit: + /// https://en.wikipedia.org/wiki/BGN/PCGN_romanization_of_Russian + ///
+ public static readonly Lazy BgnPcgn = new Lazy(() => new BgnPcgnSystem()); + + /// + /// The BGN/PCGN system of romanization for Russian.
+ /// It was developed by the Unites States Board on Geographic Names and the Permanent Committee on Geographical Names for British Official Use, and is + /// designed to be easier for anglophones to pronounce.
+ /// For more information, visit: + /// https://en.wikipedia.org/wiki/BGN/PCGN_romanization_of_Russian + ///
+ public sealed class BgnPcgnSystem : IRomanizationSystem + { + /// + public bool TransliterationSystem => true; + + // System-Specific Constants + private static readonly Dictionary RomanizationTable = new Dictionary(); + private static readonly Dictionary DigraphTable = new Dictionary(); + + private static CharSubCased YeProvisionSub; + private static CharSubCased YoProvisionSub; + private static CharSubCased IDigraphSub; + private static CharSubCased YeryExceptionDigraphSub; + private static CharSubCased YeryVowelsDigraphSub; + private static CharSubCased EConsonantsDigraphSub; + + internal BgnPcgnSystem() + { + YeProvisionSub = new CharSubCased($"(^|\\b|[{RussianVowels}ЙйЪъЬь])Е", $"(^|\\b|[{RussianVowels}ЙйЪъЬь])е", + "${1}Ye", "${1}ye"); + YoProvisionSub = new CharSubCased($"(^|\\b|[{RussianVowels}ЙйЪъЬь])Ё", $"(^|\\b|[{RussianVowels}ЙйЪъЬь])ё", + "${1}Yё", "${1}yё"); + IDigraphSub = new CharSubCased("Й([АаУуЫыЭэ])", "й([АаУуЫыЭэ])", "Y·${1}", "y·${1}"); + YeryExceptionDigraphSub = new CharSubCased("Ы([АаУуЫыЭэ])", "ы([АаУуЫыЭэ])", "Y·${1}", "y·${1}"); + YeryVowelsDigraphSub = + new CharSubCased($"([{RussianVowels}])Ы", $"([{RussianVowels}])ы", "${1}·Y", "${1}·y"); + EConsonantsDigraphSub = + new CharSubCased($"([{RussianConsonants.WithoutChars("Йй")}])Э", $"([{RussianConsonants.WithoutChars("Йй")}])э", + "${1}·E", "${1}·e"); + + #region Romanization Chart + + // Sourced from https://en.wikipedia.org/wiki/BGN/PCGN_romanization_of_Russian + + // Main characters (2021) + RomanizationTable["А"] = "A"; + RomanizationTable["а"] = "a"; + RomanizationTable["Б"] = "B"; + RomanizationTable["б"] = "b"; + RomanizationTable["В"] = "V"; + RomanizationTable["в"] = "v"; + RomanizationTable["Г"] = "G"; + RomanizationTable["г"] = "g"; + RomanizationTable["Д"] = "D"; + RomanizationTable["д"] = "d"; + RomanizationTable["Е"] = "E"; // has special provisions + RomanizationTable["е"] = "e"; // has special provisions + RomanizationTable["Ё"] = "Ë"; // has special provisions + RomanizationTable["ё"] = "ë"; // has special provisions + RomanizationTable["Ж"] = "Zh"; + RomanizationTable["ж"] = "zh"; + RomanizationTable["З"] = "Z"; + RomanizationTable["з"] = "z"; + RomanizationTable["И"] = "I"; + RomanizationTable["и"] = "i"; + RomanizationTable["Й"] = "Y"; // has special provisions + RomanizationTable["й"] = "y"; // has special provisions + RomanizationTable["К"] = "K"; + RomanizationTable["к"] = "k"; + RomanizationTable["Л"] = "L"; + RomanizationTable["л"] = "l"; + RomanizationTable["М"] = "M"; + RomanizationTable["м"] = "m"; + RomanizationTable["Н"] = "N"; + RomanizationTable["н"] = "n"; + RomanizationTable["О"] = "O"; + RomanizationTable["о"] = "o"; + RomanizationTable["П"] = "P"; + RomanizationTable["п"] = "p"; + RomanizationTable["Р"] = "R"; + RomanizationTable["р"] = "r"; + RomanizationTable["С"] = "S"; + RomanizationTable["с"] = "s"; + RomanizationTable["Т"] = "T"; + RomanizationTable["т"] = "t"; + RomanizationTable["У"] = "U"; + RomanizationTable["у"] = "u"; + RomanizationTable["Ф"] = "F"; + RomanizationTable["ф"] = "f"; + RomanizationTable["Х"] = "Kh"; + RomanizationTable["х"] = "kh"; + RomanizationTable["Ц"] = "Ts"; + RomanizationTable["ц"] = "ts"; + RomanizationTable["Ч"] = "Ch"; + RomanizationTable["ч"] = "ch"; + RomanizationTable["Ш"] = "Sh"; + RomanizationTable["ш"] = "sh"; + RomanizationTable["Щ"] = "Shch"; + RomanizationTable["щ"] = "shch"; + RomanizationTable["Ъ"] = "ʺ"; + RomanizationTable["ъ"] = "ʺ"; + RomanizationTable["Ы"] = "Y"; // has special provisions + RomanizationTable["ы"] = "y"; // has special provisions + RomanizationTable["Ь"] = "ʹ"; + RomanizationTable["ь"] = "ʹ"; + RomanizationTable["Э"] = "E"; // has special provisions + RomanizationTable["э"] = "e"; // has special provisions + RomanizationTable["Ю"] = "Yu"; + RomanizationTable["ю"] = "yu"; + RomanizationTable["Я"] = "Ya"; + RomanizationTable["я"] = "ya"; + + // Digraphs specific to this system + DigraphTable["Тс"] = "T·s"; + DigraphTable["тс"] = "t·s"; + DigraphTable["Шч"] = "Sh·ch"; + DigraphTable["шч"] = "sh·ch"; + + #endregion + } + + /// + /// Performs romanization according to the BGN/PCGN system on the given text. + /// + /// The text to romanize. + /// Whether or not to insert special digraph combinations with interpunct characters (eg. шч -> sh·ch). + /// A romanized version of the text, leaving unrecognized characters untouched. + public string Process(string text, bool useDigraphs) + { + // Digraphs first, if they're to be inserted + if (useDigraphs) + text = text.ReplaceFromChart(DigraphTable) + .ReplaceMany(IDigraphSub, YeryExceptionDigraphSub, YeryVowelsDigraphSub, EConsonantsDigraphSub); + + // Then single characters + return text + .ReplaceMany(YeProvisionSub, YoProvisionSub) + .ReplaceFromChart(RomanizationTable); + } + + /// + /// Performs romanization according to the BGN/PCGN system on the given text, using digraphs. + /// + /// The text to romanize. + /// A romanized version of the text, leaving unrecognized characters untouched. + public string Process(string text) + => Process(text, true); + } + } +} diff --git a/Romanization/Languages/Russian/Bs29791958.cs b/Romanization/Languages/Russian/Bs29791958.cs new file mode 100644 index 0000000..ec39d7b --- /dev/null +++ b/Romanization/Languages/Russian/Bs29791958.cs @@ -0,0 +1,145 @@ +using Romanization.LanguageAgnostic; +using System; +using System.Collections.Generic; + +// ReSharper disable CheckNamespace +// ReSharper disable CommentTypo +// ReSharper disable IdentifierTypo +// ReSharper disable StringLiteralTypo +// ReSharper disable InconsistentNaming + +namespace Romanization +{ + public static partial class Russian + { + /// + /// The British Standard 2979:1958 system of romanization for Russian.
+ /// It is the main system of Oxford University Press, and was used by the British Library up until 1975. ALA-LC is now used instead.
+ /// For more information, visit: + /// https://en.wikipedia.org/wiki/Romanization_of_Russian#British_Standard + ///
+ public static readonly Lazy Bs29791958 = new Lazy(() => new Bs29791958System()); + + /// + /// The British Standard 2979:1958 system of romanization for Russian.
+ /// It is the main system of Oxford University Press, and was used by the British Library up until 1975. ALA-LC is now used instead.
+ /// For more information, visit: + /// https://en.wikipedia.org/wiki/Romanization_of_Russian#British_Standard + ///
+ public sealed class Bs29791958System : IRomanizationSystem + { + /// + public bool TransliterationSystem => true; + + // System-Specific Constants + private static readonly Dictionary RomanizationTable = new Dictionary(); + private static readonly Dictionary DigraphTable = new Dictionary(); + + private static CharSub HardSignSub; + + internal Bs29791958System() + { + HardSignSub = new CharSub("[Ъъ]\\b", ""); + + #region Romanization Chart + + // Sourced from https://en.wikipedia.org/wiki/BGN/PCGN_romanization_of_Russian + + // Main characters (2021) + RomanizationTable["А"] = "A"; + RomanizationTable["а"] = "a"; + RomanizationTable["Б"] = "B"; + RomanizationTable["б"] = "b"; + RomanizationTable["В"] = "V"; + RomanizationTable["в"] = "v"; + RomanizationTable["Г"] = "G"; + RomanizationTable["г"] = "g"; + RomanizationTable["Д"] = "D"; + RomanizationTable["д"] = "d"; + RomanizationTable["Е"] = "E"; + RomanizationTable["е"] = "e"; + RomanizationTable["Ё"] = "Ë"; + RomanizationTable["ё"] = "ë"; + RomanizationTable["Ж"] = "Zh"; + RomanizationTable["ж"] = "zh"; + RomanizationTable["З"] = "Z"; + RomanizationTable["з"] = "z"; + RomanizationTable["И"] = "I"; + RomanizationTable["и"] = "i"; + RomanizationTable["Й"] = "Ĭ"; + RomanizationTable["й"] = "ĭ"; + RomanizationTable["К"] = "K"; + RomanizationTable["к"] = "k"; + RomanizationTable["Л"] = "L"; + RomanizationTable["л"] = "l"; + RomanizationTable["М"] = "M"; + RomanizationTable["м"] = "m"; + RomanizationTable["Н"] = "N"; + RomanizationTable["н"] = "n"; + RomanizationTable["О"] = "O"; + RomanizationTable["о"] = "o"; + RomanizationTable["П"] = "P"; + RomanizationTable["п"] = "p"; + RomanizationTable["Р"] = "R"; + RomanizationTable["р"] = "r"; + RomanizationTable["С"] = "S"; + RomanizationTable["с"] = "s"; + RomanizationTable["Т"] = "T"; + RomanizationTable["т"] = "t"; + RomanizationTable["У"] = "U"; + RomanizationTable["у"] = "u"; + RomanizationTable["Ф"] = "F"; + RomanizationTable["ф"] = "f"; + RomanizationTable["Х"] = "Kh"; + RomanizationTable["х"] = "kh"; + RomanizationTable["Ц"] = "Ts"; + RomanizationTable["ц"] = "ts"; + RomanizationTable["Ч"] = "Ch"; + RomanizationTable["ч"] = "ch"; + RomanizationTable["Ш"] = "Sh"; + RomanizationTable["ш"] = "sh"; + RomanizationTable["Щ"] = "Shch"; + RomanizationTable["щ"] = "shch"; + RomanizationTable["Ъ"] = "ʺ"; + RomanizationTable["ъ"] = "ʺ"; + RomanizationTable["Ы"] = "Ȳ"; + RomanizationTable["ы"] = "ȳ"; + RomanizationTable["Ь"] = "ʹ"; + RomanizationTable["ь"] = "ʹ"; + RomanizationTable["Э"] = "É"; + RomanizationTable["э"] = "é"; + RomanizationTable["Ю"] = "Yu"; + RomanizationTable["ю"] = "yu"; + RomanizationTable["Я"] = "Ya"; + RomanizationTable["я"] = "ya"; + + // Letters eliminated in the orthographic reform of 1918 + RomanizationTable["І"] = "Ī"; + RomanizationTable["і"] = "ī"; + RomanizationTable["Ѳ"] = "Ḟ"; + RomanizationTable["ѳ"] = "ḟ"; + RomanizationTable["Ѣ"] = "Ê"; + RomanizationTable["ѣ"] = "ê"; + RomanizationTable["Ѵ"] = "Y̆"; + RomanizationTable["ѵ"] = "y̆"; + + // Digraphs specific to this system + DigraphTable["Тс"] = "T-s"; + DigraphTable["тс"] = "t-s"; + + #endregion + } + + /// + /// Performs romanization according to the British Standard 2979:1958 on the given text. + /// + /// The text to romanize. + /// A romanized version of the text, leaving unrecognized characters untouched. + public string Process(string text) + => text + .ReplaceFromChart(DigraphTable) + .ReplaceMany(HardSignSub) + .ReplaceFromChart(RomanizationTable); + } + } +} diff --git a/Romanization/Languages/Russian/Gost16876711.cs b/Romanization/Languages/Russian/Gost16876711.cs new file mode 100644 index 0000000..58f2d6c --- /dev/null +++ b/Romanization/Languages/Russian/Gost16876711.cs @@ -0,0 +1,133 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using Romanization.LanguageAgnostic; + +// ReSharper disable CheckNamespace +// ReSharper disable CommentTypo +// ReSharper disable IdentifierTypo +// ReSharper disable StringLiteralTypo +// ReSharper disable InconsistentNaming + +namespace Romanization +{ + public static partial class Russian + { + /// + /// The GOST 16876-71(1) (UNGEGN) romanization system of Russian.
+ /// This system was recommended by the United Nations Group of Experts on Geographical Names.
+ /// This is Table 1 of the GOST 16876-71 system with 1 Cyrillic to 1 Latin char, with diacritics.
+ /// For more information, visit: + /// https://en.wikipedia.org/wiki/GOST_16876-71 + ///
+ public static readonly Lazy Gost16876711 = new Lazy(() => new Gost16876711System()); + + /// + /// The GOST 16876-71(1) (UNGEGN) romanization system of Russian.
+ /// This system was recommended by the United Nations Group of Experts on Geographical Names.
+ /// This is Table 1 of the GOST 16876-71 system with 1 Cyrillic to 1 Latin char, with diacritics.
+ /// For more information, visit: + /// https://en.wikipedia.org/wiki/GOST_16876-71 + ///
+ public sealed class Gost16876711System : IRomanizationSystem + { + /// + public bool TransliterationSystem => true; + + // System-Specific Constants + private static readonly Dictionary RomanizationTable = new Dictionary(); + + internal Gost16876711System() + { + #region Romanization Chart + + // Sourced from https://en.wikipedia.org/wiki/Romanization_of_Russian and https://en.wikipedia.org/wiki/GOST_16876-71 + + RomanizationTable['А'] = 'A'; + RomanizationTable['а'] = 'a'; + RomanizationTable['Б'] = 'B'; + RomanizationTable['б'] = 'b'; + RomanizationTable['В'] = 'V'; + RomanizationTable['в'] = 'v'; + RomanizationTable['Г'] = 'G'; + RomanizationTable['г'] = 'g'; + RomanizationTable['Д'] = 'D'; + RomanizationTable['д'] = 'd'; + RomanizationTable['Е'] = 'E'; + RomanizationTable['е'] = 'e'; + RomanizationTable['Ё'] = 'Ë'; + RomanizationTable['ё'] = 'ë'; + RomanizationTable['Ж'] = 'Ž'; + RomanizationTable['ж'] = 'ž'; + RomanizationTable['З'] = 'Z'; + RomanizationTable['з'] = 'z'; + RomanizationTable['И'] = 'I'; + RomanizationTable['и'] = 'i'; + RomanizationTable['Й'] = 'J'; + RomanizationTable['й'] = 'j'; + RomanizationTable['К'] = 'K'; + RomanizationTable['к'] = 'k'; + RomanizationTable['Л'] = 'L'; + RomanizationTable['л'] = 'l'; + RomanizationTable['М'] = 'M'; + RomanizationTable['м'] = 'm'; + RomanizationTable['Н'] = 'N'; + RomanizationTable['н'] = 'n'; + RomanizationTable['О'] = 'O'; + RomanizationTable['о'] = 'o'; + RomanizationTable['П'] = 'P'; + RomanizationTable['п'] = 'p'; + RomanizationTable['Р'] = 'R'; + RomanizationTable['р'] = 'r'; + RomanizationTable['С'] = 'S'; + RomanizationTable['с'] = 's'; + RomanizationTable['Т'] = 'T'; + RomanizationTable['т'] = 't'; + RomanizationTable['У'] = 'U'; + RomanizationTable['у'] = 'u'; + RomanizationTable['Ф'] = 'F'; + RomanizationTable['ф'] = 'f'; + RomanizationTable['Х'] = 'H'; + RomanizationTable['х'] = 'h'; + RomanizationTable['Ц'] = 'C'; + RomanizationTable['ц'] = 'c'; + RomanizationTable['Ч'] = 'Č'; + RomanizationTable['ч'] = 'č'; + RomanizationTable['Ш'] = 'Š'; + RomanizationTable['ш'] = 'š'; + RomanizationTable['Щ'] = 'Ŝ'; + RomanizationTable['щ'] = 'ŝ'; + RomanizationTable['Ъ'] = 'ʺ'; + RomanizationTable['ъ'] = 'ʺ'; + RomanizationTable['Ы'] = 'Y'; + RomanizationTable['ы'] = 'y'; + RomanizationTable['Ь'] = 'ʹ'; + RomanizationTable['ь'] = 'ʹ'; + RomanizationTable['Э'] = 'Ė'; + RomanizationTable['э'] = 'ė'; + RomanizationTable['Ю'] = 'Û'; + RomanizationTable['ю'] = 'û'; + RomanizationTable['Я'] = 'Â'; + RomanizationTable['я'] = 'â'; + RomanizationTable['І'] = 'I'; + RomanizationTable['і'] = 'i'; + RomanizationTable['Ѳ'] = 'Ḟ'; + RomanizationTable['ѳ'] = 'ḟ'; + RomanizationTable['Ѣ'] = 'Ě'; + RomanizationTable['ѣ'] = 'ě'; + RomanizationTable['Ѵ'] = 'Ẏ'; + RomanizationTable['ѵ'] = 'ẏ'; + + #endregion + } + + /// + /// Performs GOST 16876-71(1) romanization on Russian text. + /// + /// The text to romanize. + /// A romanized version of the text, leaving unrecognized characters untouched. + public string Process(string text) + => text.ReplaceFromChart(RomanizationTable); + } + } +} diff --git a/Romanization/Languages/Russian/Gost16876712.cs b/Romanization/Languages/Russian/Gost16876712.cs new file mode 100644 index 0000000..f874154 --- /dev/null +++ b/Romanization/Languages/Russian/Gost16876712.cs @@ -0,0 +1,123 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using Romanization.LanguageAgnostic; + +// ReSharper disable CheckNamespace +// ReSharper disable CommentTypo +// ReSharper disable IdentifierTypo +// ReSharper disable StringLiteralTypo +// ReSharper disable InconsistentNaming + +namespace Romanization +{ + public static partial class Russian + { + /// + /// The GOST 16876-71(2) romanization system of Russian.
+ /// This is Table 2 of the GOST 16876-71 system with 1 Cyrillic to potentially many Latin chars, without diacritics.
+ /// For more information, visit: + /// https://en.wikipedia.org/wiki/GOST_16876-71 + ///
+ public static readonly Lazy Gost16876712 = new Lazy(() => new Gost16876712System()); + + /// + /// The GOST 16876-71(2) romanization system of Russian.
+ /// This is Table 2 of the GOST 16876-71 system with 1 Cyrillic to potentially many Latin chars, without diacritics.
+ /// For more information, visit: + /// https://en.wikipedia.org/wiki/GOST_16876-71 + ///
+ public sealed class Gost16876712System : IRomanizationSystem + { + /// + public bool TransliterationSystem => true; + + // System-Specific Constants + private static readonly Dictionary RomanizationTable = new Dictionary(); + + internal Gost16876712System() + { + #region Romanization Chart + + // Sourced from https://en.wikipedia.org/wiki/Romanization_of_Russian and https://en.wikipedia.org/wiki/GOST_16876-71 + + RomanizationTable["А"] = "A"; + RomanizationTable["а"] = "a"; + RomanizationTable["Б"] = "B"; + RomanizationTable["б"] = "b"; + RomanizationTable["В"] = "V"; + RomanizationTable["в"] = "v"; + RomanizationTable["Г"] = "G"; + RomanizationTable["г"] = "g"; + RomanizationTable["Д"] = "D"; + RomanizationTable["д"] = "d"; + RomanizationTable["Е"] = "E"; + RomanizationTable["е"] = "e"; + RomanizationTable["Ё"] = "Jo"; + RomanizationTable["ё"] = "jo"; + RomanizationTable["Ж"] = "Zh"; + RomanizationTable["ж"] = "zh"; + RomanizationTable["З"] = "Z"; + RomanizationTable["з"] = "z"; + RomanizationTable["И"] = "I"; + RomanizationTable["и"] = "i"; + RomanizationTable["Й"] = "J"; + RomanizationTable["й"] = "j"; + RomanizationTable["К"] = "K"; + RomanizationTable["к"] = "k"; + RomanizationTable["Л"] = "L"; + RomanizationTable["л"] = "l"; + RomanizationTable["М"] = "M"; + RomanizationTable["м"] = "m"; + RomanizationTable["Н"] = "N"; + RomanizationTable["н"] = "n"; + RomanizationTable["О"] = "O"; + RomanizationTable["о"] = "o"; + RomanizationTable["П"] = "P"; + RomanizationTable["п"] = "p"; + RomanizationTable["Р"] = "R"; + RomanizationTable["р"] = "r"; + RomanizationTable["С"] = "S"; + RomanizationTable["с"] = "s"; + RomanizationTable["Т"] = "T"; + RomanizationTable["т"] = "t"; + RomanizationTable["У"] = "U"; + RomanizationTable["у"] = "u"; + RomanizationTable["Ф"] = "F"; + RomanizationTable["ф"] = "f"; + RomanizationTable["Х"] = "Kh"; + RomanizationTable["х"] = "kh"; + RomanizationTable["Ц"] = "C"; + RomanizationTable["ц"] = "c"; + RomanizationTable["Ч"] = "Ch"; + RomanizationTable["ч"] = "ch"; + RomanizationTable["Ш"] = "Sh"; + RomanizationTable["ш"] = "sh"; + RomanizationTable["Щ"] = "Shh"; + RomanizationTable["щ"] = "shh"; + RomanizationTable["Ъ"] = "ʺ"; + RomanizationTable["ъ"] = "ʺ"; + RomanizationTable["Ы"] = "Y"; + RomanizationTable["ы"] = "y"; + RomanizationTable["Ь"] = "ʹ"; + RomanizationTable["ь"] = "ʹ"; + RomanizationTable["Э"] = "Eh"; + RomanizationTable["э"] = "eh"; + RomanizationTable["Ю"] = "Ju"; + RomanizationTable["ю"] = "ju"; + RomanizationTable["Я"] = "Ja"; + RomanizationTable["я"] = "ja"; + + #endregion + } + + /// + /// Performs GOST 16876-71(2) romanization on Russian text. + /// + /// The text to romanize. + /// A romanized version of the text, leaving unrecognized characters untouched. + public string Process(string text) + => text.ReplaceFromChart(RomanizationTable); + } + } +} diff --git a/Romanization/Languages/Russian/Gost7792000A.cs b/Romanization/Languages/Russian/Gost7792000A.cs new file mode 100644 index 0000000..d19202c --- /dev/null +++ b/Romanization/Languages/Russian/Gost7792000A.cs @@ -0,0 +1,137 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using Romanization.LanguageAgnostic; + +// ReSharper disable CheckNamespace +// ReSharper disable CommentTypo +// ReSharper disable IdentifierTypo +// ReSharper disable StringLiteralTypo +// ReSharper disable InconsistentNaming + +namespace Romanization +{ + public static partial class Russian + { + /// + /// The GOST 7.79-2000(A) romanization system of Russian.
+ /// This is System A of the GOST 7.79-2000 system with 1 Cyrillic to 1 Latin char, with diacritics.
+ /// Identical to ISO 9:1995 (different to ISO/R 9:1968).
+ /// For more information, visit: + /// https://en.wikipedia.org/wiki/GOST_7.79-2000 + ///
+ public static readonly Lazy Gost7792000A = new Lazy(() => new Gost7792000ASystem()); + + /// + /// The GOST 7.79-2000(A) romanization system of Russian.
+ /// This is System A of the GOST 7.79-2000 system with 1 Cyrillic to 1 Latin char, with diacritics.
+ /// Identical to ISO 9:1995 (different to ISO/R 9:1968).
+ /// For more information, visit: + /// https://en.wikipedia.org/wiki/GOST_7.79-2000 + ///
+ public sealed class Gost7792000ASystem : IRomanizationSystem + { + /// + public bool TransliterationSystem => true; + + // System-Specific Constants + private static readonly Dictionary RomanizationTable = new Dictionary(); + + internal Gost7792000ASystem() + { + #region Romanization Chart + + // Sourced from https://en.wikipedia.org/wiki/Romanization_of_Russian and https://en.wikipedia.org/wiki/GOST_7.79-2000 + + RomanizationTable["А"] = "A"; + RomanizationTable["а"] = "a"; + RomanizationTable["Б"] = "B"; + RomanizationTable["б"] = "b"; + RomanizationTable["В"] = "V"; + RomanizationTable["в"] = "v"; + RomanizationTable["Г"] = "G"; + RomanizationTable["г"] = "g"; + RomanizationTable["Д"] = "D"; + RomanizationTable["д"] = "d"; + RomanizationTable["Е"] = "E"; + RomanizationTable["е"] = "e"; + RomanizationTable["Ё"] = "Ë"; + RomanizationTable["ё"] = "ë"; + RomanizationTable["Ж"] = "Ž"; + RomanizationTable["ж"] = "ž"; + RomanizationTable["З"] = "Z"; + RomanizationTable["з"] = "z"; + RomanizationTable["И"] = "I"; + RomanizationTable["и"] = "i"; + RomanizationTable["Й"] = "J"; + RomanizationTable["й"] = "j"; + RomanizationTable["К"] = "K"; + RomanizationTable["к"] = "k"; + RomanizationTable["Л"] = "L"; + RomanizationTable["л"] = "l"; + RomanizationTable["М"] = "M"; + RomanizationTable["м"] = "m"; + RomanizationTable["Н"] = "N"; + RomanizationTable["н"] = "n"; + RomanizationTable["О"] = "O"; + RomanizationTable["о"] = "o"; + RomanizationTable["П"] = "P"; + RomanizationTable["п"] = "p"; + RomanizationTable["Р"] = "R"; + RomanizationTable["р"] = "r"; + RomanizationTable["С"] = "S"; + RomanizationTable["с"] = "s"; + RomanizationTable["Т"] = "T"; + RomanizationTable["т"] = "t"; + RomanizationTable["У"] = "U"; + RomanizationTable["у"] = "u"; + RomanizationTable["Ф"] = "F"; + RomanizationTable["ф"] = "f"; + RomanizationTable["Х"] = "H"; + RomanizationTable["х"] = "h"; + RomanizationTable["Ц"] = "C"; + RomanizationTable["ц"] = "c"; + RomanizationTable["Ч"] = "Č"; + RomanizationTable["ч"] = "č"; + RomanizationTable["Ш"] = "Š"; + RomanizationTable["ш"] = "š"; + RomanizationTable["Щ"] = "Ŝ"; + RomanizationTable["щ"] = "ŝ"; + RomanizationTable["Ъ"] = "ʺ"; + RomanizationTable["ъ"] = "ʺ"; + RomanizationTable["Ы"] = "Y"; + RomanizationTable["ы"] = "y"; + RomanizationTable["Ь"] = "ʹ"; + RomanizationTable["ь"] = "ʹ"; + RomanizationTable["Э"] = "È"; + RomanizationTable["э"] = "è"; + RomanizationTable["Ю"] = "Û"; + RomanizationTable["ю"] = "û"; + RomanizationTable["Я"] = "Â"; + RomanizationTable["я"] = "â"; + RomanizationTable["І"] = "Ì"; + RomanizationTable["і"] = "ì"; + RomanizationTable["Ѳ"] = "F̀"; + RomanizationTable["ѳ"] = "f̀"; + RomanizationTable["Ѣ"] = "Ě"; + RomanizationTable["ѣ"] = "ě"; + RomanizationTable["Ѵ"] = "Ỳ"; + RomanizationTable["ѵ"] = "ỳ"; + RomanizationTable["Ѕ"] = "Ẑ"; + RomanizationTable["ѕ"] = "ẑ"; + RomanizationTable["Ѫ"] = "Ǎ"; + RomanizationTable["ѫ"] = "ǎ"; + + #endregion + } + + /// + /// Performs GOST 7.79-2000(A) romanization on Russian text. + /// + /// The text to romanize. + /// A romanized version of the text, leaving unrecognized characters untouched. + public string Process(string text) + => text.ReplaceFromChart(RomanizationTable); + } + } +} diff --git a/Romanization/Languages/Russian/Gost7792000B.cs b/Romanization/Languages/Russian/Gost7792000B.cs new file mode 100644 index 0000000..3406388 --- /dev/null +++ b/Romanization/Languages/Russian/Gost7792000B.cs @@ -0,0 +1,145 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text.RegularExpressions; +using Romanization.LanguageAgnostic; + +// ReSharper disable CheckNamespace +// ReSharper disable CommentTypo +// ReSharper disable IdentifierTypo +// ReSharper disable StringLiteralTypo +// ReSharper disable InconsistentNaming + +namespace Romanization +{ + public static partial class Russian + { + /// + /// The GOST 7.79-2000(B) romanization system of Russian.
+ /// This is System B of the GOST 7.79-2000 system with 1 Cyrillic to potentially many Latin chars, without diacritics.
+ /// For more information, visit: + /// https://en.wikipedia.org/wiki/GOST_7.79-2000 + ///
+ public static readonly Lazy Gost7792000B = new Lazy(() => new Gost7792000BSystem()); + + /// + /// The GOST 7.79-2000(B) romanization system of Russian.
+ /// This is System B of the GOST 7.79-2000 system with 1 Cyrillic to potentially many Latin chars, without diacritics.
+ /// For more information, visit: + /// https://en.wikipedia.org/wiki/GOST_7.79-2000 + ///
+ public sealed class Gost7792000BSystem : IRomanizationSystem + { + /// + public bool TransliterationSystem => true; + + // System-Specific Constants + private static readonly Dictionary RomanizationTable = new Dictionary(); + + private static LanguageAgnostic.CharSubCased TseVowelsSub; + private static LanguageAgnostic.CharSubCased TseConsonantsSub; + + internal Gost7792000BSystem() + { + TseVowelsSub = new LanguageAgnostic.CharSubCased("Ц([eijy])", "ц([eijy])", "C${1}", "c${1}"); + TseConsonantsSub = new LanguageAgnostic.CharSubCased("Ц([abcdfghklmnopqrstuvwxz])", + "ц([abcdfghklmnopqrstuvwxz])", "Cz${1}", "cz${1}"); + + #region Romanization Chart + + // Sourced from https://en.wikipedia.org/wiki/Romanization_of_Russian and https://en.wikipedia.org/wiki/GOST_7.79-2000 + + RomanizationTable["А"] = "A"; + RomanizationTable["а"] = "a"; + RomanizationTable["Б"] = "B"; + RomanizationTable["б"] = "b"; + RomanizationTable["В"] = "V"; + RomanizationTable["в"] = "v"; + RomanizationTable["Г"] = "G"; + RomanizationTable["г"] = "g"; + RomanizationTable["Д"] = "D"; + RomanizationTable["д"] = "d"; + RomanizationTable["Е"] = "E"; + RomanizationTable["е"] = "e"; + RomanizationTable["Ё"] = "Yo"; + RomanizationTable["ё"] = "yo"; + RomanizationTable["Ж"] = "Zh"; + RomanizationTable["ж"] = "zh"; + RomanizationTable["З"] = "Z"; + RomanizationTable["з"] = "z"; + RomanizationTable["И"] = "I"; + RomanizationTable["и"] = "i"; + RomanizationTable["Й"] = "J"; + RomanizationTable["й"] = "j"; + RomanizationTable["К"] = "K"; + RomanizationTable["к"] = "k"; + RomanizationTable["Л"] = "L"; + RomanizationTable["л"] = "l"; + RomanizationTable["М"] = "M"; + RomanizationTable["м"] = "m"; + RomanizationTable["Н"] = "N"; + RomanizationTable["н"] = "n"; + RomanizationTable["О"] = "O"; + RomanizationTable["о"] = "o"; + RomanizationTable["П"] = "P"; + RomanizationTable["п"] = "p"; + RomanizationTable["Р"] = "R"; + RomanizationTable["р"] = "r"; + RomanizationTable["С"] = "S"; + RomanizationTable["с"] = "s"; + RomanizationTable["Т"] = "T"; + RomanizationTable["т"] = "t"; + RomanizationTable["У"] = "U"; + RomanizationTable["у"] = "u"; + RomanizationTable["Ф"] = "F"; + RomanizationTable["ф"] = "f"; + RomanizationTable["Х"] = "X"; + RomanizationTable["х"] = "x"; + //RomanizationTable["Ц"] = "Cz"; + //RomanizationTable["ц"] = "cz"; + RomanizationTable["Ч"] = "Ch"; + RomanizationTable["ч"] = "ch"; + RomanizationTable["Ш"] = "Sh"; + RomanizationTable["ш"] = "sh"; + RomanizationTable["Щ"] = "Shh"; + RomanizationTable["щ"] = "shh"; + RomanizationTable["Ъ"] = "ʺ"; + RomanizationTable["ъ"] = "ʺ"; + RomanizationTable["Ы"] = "Y"; + RomanizationTable["ы"] = "y"; + RomanizationTable["Ь"] = ""; + RomanizationTable["ь"] = "ʹ"; + RomanizationTable["Э"] = "E`"; + RomanizationTable["э"] = "e`"; + RomanizationTable["Ю"] = "Yu"; + RomanizationTable["ю"] = "yu"; + RomanizationTable["Я"] = "Ya"; + RomanizationTable["я"] = "ya"; + RomanizationTable["Ѣ"] = "Ye"; + RomanizationTable["ѣ"] = "ye"; + RomanizationTable["І"] = "I"; + RomanizationTable["і"] = "i"; + RomanizationTable["Ѳ"] = "Fh"; + RomanizationTable["ѳ"] = "fh"; + RomanizationTable["Ѵ"] = "Yh"; + RomanizationTable["ѵ"] = "yh"; + RomanizationTable["Ѕ"] = "Js"; + RomanizationTable["ѕ"] = "js"; + + #endregion + } + + /// + /// Performs GOST 7.79-2000(B) romanization on Russian text. + /// + /// The text to romanize. + /// A romanized version of the text, leaving unrecognized characters untouched. + public string Process(string text) + => text + // Do romanization replacements + .ReplaceFromChart(RomanizationTable) + // Render tse (Ц/ц) as "c" if in front of e, i, j, or y, and as "cz" otherwise + .ReplaceMany(TseVowelsSub, TseConsonantsSub); + } + } +} diff --git a/Romanization/Languages/Russian/Icao9303.cs b/Romanization/Languages/Russian/Icao9303.cs new file mode 100644 index 0000000..625ba9c --- /dev/null +++ b/Romanization/Languages/Russian/Icao9303.cs @@ -0,0 +1,125 @@ +using Romanization.LanguageAgnostic; +using System; +using System.Collections.Generic; + +// ReSharper disable CheckNamespace +// ReSharper disable CommentTypo +// ReSharper disable IdentifierTypo +// ReSharper disable StringLiteralTypo +// ReSharper disable InconsistentNaming + +namespace Romanization +{ + public static partial class Russian + { + /// + /// The system from ICAO Doc 9303 "Machine Readable Travel Documents, Part 3".
+ /// This is the standard for modern Russian passports, in 2021.
+ /// For more information, visit: + /// https://www.icao.int/publications/Documents/9303_p3_cons_en.pdf + ///
+ public static readonly Lazy Icao9303 = new Lazy(() => new Icao9303System()); + + /// + /// The system from ICAO Doc 9303 "Machine Readable Travel Documents, Part 3".
+ /// This is the standard for modern Russian passports, in 2021.
+ /// For more information, visit: + /// https://www.icao.int/publications/Documents/9303_p3_cons_en.pdf + ///
+ public sealed class Icao9303System : IRomanizationSystem + { + /// + public bool TransliterationSystem => true; + + // System-Specific Constants + private static readonly Dictionary RomanizationTable = new Dictionary(); + + internal Icao9303System() + { + #region Romanization Chart + + // Sourced from https://en.wikipedia.org/wiki/Scientific_transliteration_of_Cyrillic + + // Main characters (2021) + RomanizationTable["А"] = "A"; + RomanizationTable["а"] = "a"; + RomanizationTable["Б"] = "B"; + RomanizationTable["б"] = "b"; + RomanizationTable["В"] = "V"; + RomanizationTable["в"] = "v"; + RomanizationTable["Г"] = "G"; + RomanizationTable["г"] = "g"; + RomanizationTable["Д"] = "D"; + RomanizationTable["д"] = "d"; + RomanizationTable["Е"] = "E"; + RomanizationTable["е"] = "e"; + RomanizationTable["Ё"] = "E"; + RomanizationTable["ё"] = "e"; + RomanizationTable["Ж"] = "Zh"; + RomanizationTable["ж"] = "zh"; + RomanizationTable["З"] = "Z"; + RomanizationTable["з"] = "z"; + RomanizationTable["И"] = "I"; + RomanizationTable["и"] = "i"; + RomanizationTable["Й"] = "I"; + RomanizationTable["й"] = "i"; + RomanizationTable["I"] = "I"; + RomanizationTable["і"] = "i"; + RomanizationTable["К"] = "K"; + RomanizationTable["к"] = "k"; + RomanizationTable["Л"] = "L"; + RomanizationTable["л"] = "l"; + RomanizationTable["М"] = "M"; + RomanizationTable["м"] = "m"; + RomanizationTable["Н"] = "N"; + RomanizationTable["н"] = "n"; + RomanizationTable["О"] = "O"; + RomanizationTable["о"] = "o"; + RomanizationTable["П"] = "P"; + RomanizationTable["п"] = "p"; + RomanizationTable["Р"] = "R"; + RomanizationTable["р"] = "r"; + RomanizationTable["С"] = "S"; + RomanizationTable["с"] = "s"; + RomanizationTable["Т"] = "T"; + RomanizationTable["т"] = "t"; + RomanizationTable["У"] = "U"; + RomanizationTable["у"] = "u"; + RomanizationTable["Ф"] = "F"; + RomanizationTable["ф"] = "f"; + RomanizationTable["Х"] = "Kh"; + RomanizationTable["х"] = "kh"; + RomanizationTable["Ц"] = "Ts"; + RomanizationTable["ц"] = "ts"; + RomanizationTable["Ч"] = "Ch"; + RomanizationTable["ч"] = "ch"; + RomanizationTable["Ш"] = "Sh"; + RomanizationTable["ш"] = "sh"; + RomanizationTable["Щ"] = "Shch"; + RomanizationTable["щ"] = "shch"; + RomanizationTable["Ъ"] = "Ie"; + RomanizationTable["ъ"] = "ie"; + RomanizationTable["Ы"] = "Y"; + RomanizationTable["ы"] = "y"; + RomanizationTable["Ь"] = ""; + RomanizationTable["ь"] = ""; + RomanizationTable["Э"] = "E"; + RomanizationTable["э"] = "e"; + RomanizationTable["Ю"] = "Iu"; + RomanizationTable["ю"] = "iu"; + RomanizationTable["Я"] = "Ia"; + RomanizationTable["я"] = "ia"; + + #endregion + } + + /// + /// Performs romanization according to ICAO Doc 9303 on the given text. + /// + /// The text to romanize. + /// A romanized version of the text, leaving unrecognized characters untouched. + public string Process(string text) + => text.ReplaceFromChart(RomanizationTable); + } + } +} diff --git a/Romanization/Languages/Russian/IsoR9.cs b/Romanization/Languages/Russian/IsoR9.cs new file mode 100644 index 0000000..fd5f2a3 --- /dev/null +++ b/Romanization/Languages/Russian/IsoR9.cs @@ -0,0 +1,158 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using Romanization.LanguageAgnostic; + +// ReSharper disable CheckNamespace +// ReSharper disable CommentTypo +// ReSharper disable IdentifierTypo +// ReSharper disable StringLiteralTypo +// ReSharper disable InconsistentNaming + +namespace Romanization +{ + public static partial class Russian + { + /// + /// The ISO Recommendation No. 9 (ISO/R 9:1968) system of romanization, specialized for Russian.
+ /// This transliteration table is designed to cover Bulgarian, Russian, Belarusian, Ukrainian, Serbo-Croatian and Macedonian in general, with regional specializations for certain languages.
+ /// This is largely superceded by ISO 9 (GOST 7.79-2000(A)).
+ /// For more information, visit: + /// https://en.wikipedia.org/wiki/ISO_9#ISO/R_9 + ///
+ public static readonly Lazy IsoR9 = new Lazy(() => new IsoR9System()); + + /// + /// The ISO Recommendation No. 9 (ISO/R 9:1968) system of romanization, specialized for Russian.
+ /// This transliteration table is designed to cover Bulgarian, Russian, Belarusian, Ukrainian, Serbo-Croatian and Macedonian in general, with regional specializations for certain languages.
+ /// This is largely superceded by ISO 9 (GOST 7.79-2000(A)).
+ /// For more information, visit: + /// https://en.wikipedia.org/wiki/ISO_9#ISO/R_9 + ///
+ public sealed class IsoR9System : IRomanizationSystem + { + /// + public bool TransliterationSystem => true; + + // System-Specific Constants + private static readonly Dictionary RomanizationTable = new Dictionary(); + + internal IsoR9System() + { + #region Romanization Chart + + // Sourced from https://en.wikipedia.org/wiki/ISO_9#ISO/R_9 + + RomanizationTable["А"] = "A"; + RomanizationTable["а"] = "a"; + RomanizationTable["Б"] = "B"; + RomanizationTable["б"] = "b"; + RomanizationTable["В"] = "V"; + RomanizationTable["в"] = "v"; + RomanizationTable["Г"] = "G"; + RomanizationTable["г"] = "g"; + RomanizationTable["Д"] = "D"; + RomanizationTable["д"] = "d"; + RomanizationTable["Ѓ"] = "Ǵ"; + RomanizationTable["ѓ"] = "ǵ"; + RomanizationTable["Ђ"] = "Đ"; + RomanizationTable["ђ"] = "đ"; + RomanizationTable["Е"] = "E"; + RomanizationTable["е"] = "e"; + RomanizationTable["Ё"] = "Ë"; + RomanizationTable["ё"] = "ë"; + RomanizationTable["Є"] = "Je"; + RomanizationTable["є"] = "je"; + RomanizationTable["Ж"] = "Ž"; + RomanizationTable["ж"] = "ž"; + RomanizationTable["З"] = "Z"; + RomanizationTable["з"] = "z"; + RomanizationTable["Ѕ"] = "Dz"; + RomanizationTable["ѕ"] = "dz"; + RomanizationTable["И"] = "I"; + RomanizationTable["и"] = "i"; + RomanizationTable["I"] = "I"; + RomanizationTable["і"] = "i"; + RomanizationTable["Ї"] = "Ï"; + RomanizationTable["ї"] = "ï"; + RomanizationTable["Й"] = "J"; + RomanizationTable["й"] = "j"; + RomanizationTable["К"] = "K"; + RomanizationTable["к"] = "k"; + RomanizationTable["Л"] = "L"; + RomanizationTable["л"] = "l"; + RomanizationTable["Љ"] = "Lj"; + RomanizationTable["љ"] = "lj"; + RomanizationTable["М"] = "M"; + RomanizationTable["м"] = "m"; + RomanizationTable["Н"] = "N"; + RomanizationTable["н"] = "n"; + RomanizationTable["Њ"] = "Nj"; + RomanizationTable["њ"] = "nj"; + RomanizationTable["О"] = "O"; + RomanizationTable["о"] = "o"; + RomanizationTable["П"] = "P"; + RomanizationTable["п"] = "p"; + RomanizationTable["Р"] = "R"; + RomanizationTable["р"] = "r"; + RomanizationTable["С"] = "S"; + RomanizationTable["с"] = "s"; + RomanizationTable["Т"] = "T"; + RomanizationTable["т"] = "t"; + RomanizationTable["Ќ"] = "Ḱ"; + RomanizationTable["ќ"] = "ḱ"; + RomanizationTable["Ћ"] = "Ć"; + RomanizationTable["ћ"] = "ć"; + RomanizationTable["У"] = "U"; + RomanizationTable["у"] = "u"; + RomanizationTable["Ў"] = "Ŭ"; + RomanizationTable["ў"] = "ŭ"; + RomanizationTable["Ф"] = "F"; + RomanizationTable["ф"] = "f"; + RomanizationTable["Х"] = "H"; // RU specialization + RomanizationTable["х"] = "h"; // RU specialization + RomanizationTable["Ц"] = "C"; + RomanizationTable["ц"] = "c"; + RomanizationTable["Ч"] = "Č"; + RomanizationTable["ч"] = "č"; + RomanizationTable["Џ"] = "Dž"; + RomanizationTable["џ"] = "dž"; + RomanizationTable["Ш"] = "Š"; + RomanizationTable["ш"] = "š"; + RomanizationTable["Щ"] = "Šč"; + RomanizationTable["щ"] = "šč"; + RomanizationTable["Ъ"] = "ʺ"; + RomanizationTable["ъ"] = "ʺ"; + RomanizationTable["Ы"] = "Y"; + RomanizationTable["ы"] = "y"; + RomanizationTable["Ь"] = "ʹ"; + RomanizationTable["ь"] = "ʹ"; + RomanizationTable["Ѣ"] = "Ě"; + RomanizationTable["ѣ"] = "ě"; + RomanizationTable["Э"] = "Ė"; + RomanizationTable["э"] = "ė"; + RomanizationTable["Ю"] = "Ju"; + RomanizationTable["ю"] = "ju"; + RomanizationTable["Я"] = "Ja"; + RomanizationTable["я"] = "ja"; + RomanizationTable["’"] = "ʺ"; + RomanizationTable["Ѫ"] = "ʺ̣"; + RomanizationTable["ѫ"] = "ʺ̣"; + RomanizationTable["Ѳ"] = "Ḟ"; + RomanizationTable["ѳ"] = "ḟ"; + RomanizationTable["Ѵ"] = "Ẏ"; + RomanizationTable["ѵ"] = "ẏ"; + + #endregion + } + + /// + /// Performs romanization according to ISO/R 9:1968 on the given text, with regional specializations applied for Russian. + /// + /// The text to romanize. + /// A romanized version of the text, leaving unrecognized characters untouched. + public string Process(string text) + => text.ReplaceFromChart(RomanizationTable); + } + } +} diff --git a/Romanization/Languages/Russian/RoadSigns.cs b/Romanization/Languages/Russian/RoadSigns.cs new file mode 100644 index 0000000..7600faa --- /dev/null +++ b/Romanization/Languages/Russian/RoadSigns.cs @@ -0,0 +1,138 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text.RegularExpressions; +using Romanization.LanguageAgnostic; + +// ReSharper disable CheckNamespace +// ReSharper disable CommentTypo +// ReSharper disable IdentifierTypo +// ReSharper disable StringLiteralTypo +// ReSharper disable InconsistentNaming + +namespace Romanization +{ + public static partial class Russian + { + /// + /// The general road sign romanization system of Russian.
+ /// This consists of Russian GOST R 52290-2004 (tables Г.4, Г.5) as well as GOST 10807-78 (tables 17, 18), historically.
+ /// For more information, visit: + /// https://en.wikipedia.org/wiki/Romanization_of_Russian#Street_and_road_signs + ///
+ public static readonly Lazy RoadSigns = new Lazy(() => new RoadSignsSystem()); + + /// + /// The general road sign romanization system of Russian.
+ /// This consists of Russian GOST R 52290-2004 (tables Г.4, Г.5) as well as GOST 10807-78 (tables 17, 18), historically.
+ /// For more information, visit: + /// https://en.wikipedia.org/wiki/Romanization_of_Russian#Street_and_road_signs + ///
+ public sealed class RoadSignsSystem : IRomanizationSystem + { + /// + public bool TransliterationSystem => true; + + // System-Specific Constants + private static readonly Dictionary RomanizationTable = new Dictionary(); + + private static CharSubCased YeVowelsSub; + private static CharSubCased YoVowelsSub; + private static CharSubCased YoExceptionsSub; + + internal RoadSignsSystem() + { + YeVowelsSub = new CharSubCased("(^|\\b|[ИиЫыЭэЕеАаЯяОоЁёУуЮюЪъЬь])Е", + "(^|\\b|[ИиЫыЭэЕеАаЯяОоЁёУуЮюЪъЬь])е", "${1}Ye", "${1}ye"); + YoVowelsSub = new CharSubCased("(^|\\b|[ИиЫыЭэЕеАаЯяОоЁёУуЮюЪъЬь])Ё", + "(^|\\b|[ИиЫыЭэЕеАаЯяОоЁёУуЮюЪъЬь])ё", "${1}Yo", "${1}yo"); + YoExceptionsSub = new CharSubCased("(^|\\b|[ЧчШшЩщЖж])Ё", "(^|\\b|[ЧчШшЩщЖж])ё", "${1}E", "${1}e"); + + #region Romanization Chart + + // Sourced from https://en.wikipedia.org/wiki/Romanization_of_Russian + + RomanizationTable["А"] = "A"; + RomanizationTable["а"] = "a"; + RomanizationTable["Б"] = "B"; + RomanizationTable["б"] = "b"; + RomanizationTable["В"] = "V"; + RomanizationTable["в"] = "v"; + RomanizationTable["Г"] = "G"; + RomanizationTable["г"] = "g"; + RomanizationTable["Д"] = "D"; + RomanizationTable["д"] = "d"; + RomanizationTable["Е"] = "E"; + RomanizationTable["е"] = "e"; + RomanizationTable["Ё"] = "Ye"; + RomanizationTable["ё"] = "ye"; + RomanizationTable["Ж"] = "Zh"; + RomanizationTable["ж"] = "zh"; + RomanizationTable["З"] = "Z"; + RomanizationTable["з"] = "z"; + RomanizationTable["И"] = "I"; + RomanizationTable["и"] = "i"; + RomanizationTable["Й"] = "Y"; + RomanizationTable["й"] = "y"; + RomanizationTable["К"] = "K"; + RomanizationTable["к"] = "k"; + RomanizationTable["Л"] = "L"; + RomanizationTable["л"] = "l"; + RomanizationTable["М"] = "M"; + RomanizationTable["м"] = "m"; + RomanizationTable["Н"] = "N"; + RomanizationTable["н"] = "n"; + RomanizationTable["О"] = "O"; + RomanizationTable["о"] = "o"; + RomanizationTable["П"] = "P"; + RomanizationTable["п"] = "p"; + RomanizationTable["Р"] = "R"; + RomanizationTable["р"] = "r"; + RomanizationTable["С"] = "S"; + RomanizationTable["с"] = "s"; + RomanizationTable["Т"] = "T"; + RomanizationTable["т"] = "t"; + RomanizationTable["У"] = "U"; + RomanizationTable["у"] = "u"; + RomanizationTable["Ф"] = "F"; + RomanizationTable["ф"] = "f"; + RomanizationTable["Х"] = "Kh"; + RomanizationTable["х"] = "kh"; + RomanizationTable["Ц"] = "Ts"; + RomanizationTable["ц"] = "ts"; + RomanizationTable["Ч"] = "Ch"; + RomanizationTable["ч"] = "ch"; + RomanizationTable["Ш"] = "Sh"; + RomanizationTable["ш"] = "sh"; + RomanizationTable["Щ"] = "Shch"; + RomanizationTable["щ"] = "shch"; + RomanizationTable["Ъ"] = "ʹ"; + RomanizationTable["ъ"] = "ʹ"; + RomanizationTable["Ы"] = "Y"; + RomanizationTable["ы"] = "y"; + RomanizationTable["Ь"] = "ʹ"; + RomanizationTable["ь"] = "ʹ"; + RomanizationTable["Э"] = "E"; + RomanizationTable["э"] = "e"; + RomanizationTable["Ю"] = "Yu"; + RomanizationTable["ю"] = "yu"; + RomanizationTable["Я"] = "Ya"; + RomanizationTable["я"] = "ya"; + + #endregion + } + + /// + /// Performs general road sign romanization on Russian text. + /// + /// The text to romanize. + /// A romanized version of the text, leaving unrecognized characters untouched. + public string Process(string text) + => text + // Render ye (Е) and yo (Ё) in different forms depending on what preceeds them + .ReplaceMany(YeVowelsSub, YoVowelsSub, YoExceptionsSub) + // Do remaining romanization replacements + .ReplaceFromChart(RomanizationTable); + } + } +} diff --git a/Romanization/Languages/Russian/Russian.cs b/Romanization/Languages/Russian/Russian.cs index eebafc1..b6ff0c6 100644 --- a/Romanization/Languages/Russian/Russian.cs +++ b/Romanization/Languages/Russian/Russian.cs @@ -7,7 +7,15 @@ namespace Romanization { /// - /// The class for romanizing Russian text. + /// The class for romanizing Russian text.
+ /// The reason for the abundance of systems is because there is no single, international, modern standard like there is for many other languages.
+ /// Note that dictionary/learning-material Russian can include acute diacritics for marking stress. These are ignored by all systems here, and
+ /// the diacritic will remain on the romanized version. + /// See https://en.wikipedia.org/wiki/Russian_alphabet#Diacritics for more info. ///
- public static partial class Russian { } + public static partial class Russian + { + private const string RussianVowels = "ИиЫыЭэЕеАаЯяОоЁёУуЮю"; // https://en.wikipedia.org/wiki/Russian_phonology#Vowels + private const string RussianConsonants = "БбВвГгДдЖжЗзЙйКкЛлМмНнПпРрСсТтФфХхЦцЧчШшЩщ"; // https://russianlanguage.org.uk/russian-language-consonants/ + } } diff --git a/Romanization/Languages/Russian/Scholarly.cs b/Romanization/Languages/Russian/Scholarly.cs new file mode 100644 index 0000000..613fca9 --- /dev/null +++ b/Romanization/Languages/Russian/Scholarly.cs @@ -0,0 +1,159 @@ +using Romanization.LanguageAgnostic; +using System; +using System.Collections.Generic; + +// ReSharper disable CheckNamespace +// ReSharper disable CommentTypo +// ReSharper disable IdentifierTypo +// ReSharper disable StringLiteralTypo +// ReSharper disable InconsistentNaming + +namespace Romanization +{ + public static partial class Russian + { + /// + /// The International Scholarly System of romanization for Russian.
+ /// For more information, visit: + /// https://en.wikipedia.org/wiki/Scientific_transliteration_of_Cyrillic + ///
+ public static readonly Lazy Scholarly = new Lazy(() => new ScholarlySystem()); + + /// + /// The International Scholarly System of romanization for Russian.
+ /// For more information, visit: + /// https://en.wikipedia.org/wiki/Scientific_transliteration_of_Cyrillic + ///
+ public sealed class ScholarlySystem : IRomanizationSystem + { + /// + public bool TransliterationSystem => true; + + // System-Specific Constants + private static readonly Dictionary RomanizationTable = new Dictionary(); + + internal ScholarlySystem() + { + #region Romanization Chart + + // Sourced from https://en.wikipedia.org/wiki/Scientific_transliteration_of_Cyrillic + + // Main characters (2021) + RomanizationTable["А"] = "A"; + RomanizationTable["а"] = "a"; + RomanizationTable["Б"] = "B"; + RomanizationTable["б"] = "b"; + RomanizationTable["В"] = "V"; + RomanizationTable["в"] = "v"; + RomanizationTable["Г"] = "G"; + RomanizationTable["г"] = "g"; + RomanizationTable["Д"] = "D"; + RomanizationTable["д"] = "d"; + RomanizationTable["Е"] = "E"; + RomanizationTable["е"] = "e"; + RomanizationTable["Ё"] = "Ë"; + RomanizationTable["ё"] = "ë"; + RomanizationTable["Ж"] = "Ž"; + RomanizationTable["ж"] = "ž"; + RomanizationTable["З"] = "Z"; + RomanizationTable["з"] = "z"; + RomanizationTable["И"] = "I"; + RomanizationTable["и"] = "i"; + RomanizationTable["Й"] = "J"; + RomanizationTable["й"] = "j"; + RomanizationTable["I"] = "I"; + RomanizationTable["і"] = "i"; + RomanizationTable["К"] = "K"; + RomanizationTable["к"] = "k"; + RomanizationTable["Л"] = "L"; + RomanizationTable["л"] = "l"; + RomanizationTable["М"] = "M"; + RomanizationTable["м"] = "m"; + RomanizationTable["Н"] = "N"; + RomanizationTable["н"] = "n"; + RomanizationTable["О"] = "O"; + RomanizationTable["о"] = "o"; + RomanizationTable["П"] = "P"; + RomanizationTable["п"] = "p"; + RomanizationTable["Р"] = "R"; + RomanizationTable["р"] = "r"; + RomanizationTable["С"] = "S"; + RomanizationTable["с"] = "s"; + RomanizationTable["Т"] = "T"; + RomanizationTable["т"] = "t"; + RomanizationTable["У"] = "U"; + RomanizationTable["у"] = "u"; + RomanizationTable["Ф"] = "F"; + RomanizationTable["ф"] = "f"; + RomanizationTable["Х"] = "X"; + RomanizationTable["х"] = "x"; + RomanizationTable["Ц"] = "C"; + RomanizationTable["ц"] = "c"; + RomanizationTable["Ч"] = "Č"; + RomanizationTable["ч"] = "č"; + RomanizationTable["Ш"] = "Š"; + RomanizationTable["ш"] = "š"; + RomanizationTable["Щ"] = "Šč"; + RomanizationTable["щ"] = "šč"; + RomanizationTable["Ъ"] = "ʺ"; + RomanizationTable["ъ"] = "ʺ"; + RomanizationTable["Ы"] = "Y"; + RomanizationTable["ы"] = "y"; + RomanizationTable["Ь"] = "ʹ"; + RomanizationTable["ь"] = "ʹ"; + RomanizationTable["Э"] = "È"; + RomanizationTable["э"] = "è"; + RomanizationTable["Ю"] = "Ju"; + RomanizationTable["ю"] = "ju"; + RomanizationTable["Я"] = "Ja"; + RomanizationTable["я"] = "ja"; + + // Letters eliminated in the orthographic reform of 1918 + RomanizationTable["І"] = "I"; + RomanizationTable["і"] = "i"; + RomanizationTable["Ѳ"] = "F"; + RomanizationTable["ѳ"] = "f"; + RomanizationTable["Ѣ"] = "Ě"; + RomanizationTable["ѣ"] = "ě"; + RomanizationTable["Ѵ"] = "I"; + RomanizationTable["ѵ"] = "i"; + + // Pre-18th century letters + RomanizationTable["Є"] = "E"; + RomanizationTable["є"] = "e"; + RomanizationTable["Ѥ"] = "Je"; + RomanizationTable["ѥ"] = "je"; + RomanizationTable["Ѕ"] = "Dz"; + RomanizationTable["ѕ"] = "dz"; + RomanizationTable["Ꙋ"] = "U"; + RomanizationTable["ꙋ"] = "u"; + RomanizationTable["Ѡ"] = "Ô"; + RomanizationTable["ѡ"] = "ô"; + RomanizationTable["Ѿ"] = "Ôt"; + RomanizationTable["ѿ"] = "ôt"; + RomanizationTable["Ѫ"] = "Ǫ"; + RomanizationTable["ѫ"] = "ǫ"; + RomanizationTable["Ѧ"] = "Ę"; + RomanizationTable["ѧ"] = "ę"; + RomanizationTable["Ѭ"] = "Jǫ"; + RomanizationTable["ѭ"] = "jǫ"; + RomanizationTable["Ѩ"] = "Ję"; + RomanizationTable["ѩ"] = "ję"; + RomanizationTable["Ѯ"] = "Ks"; + RomanizationTable["ѯ"] = "ks"; + RomanizationTable["Ѱ"] = "Ps"; + RomanizationTable["ѱ"] = "ps"; + + #endregion + } + + /// + /// Performs romanization according to the International Scholarly System on the given text. + /// + /// The text to romanize. + /// A romanized version of the text, leaving unrecognized characters untouched. + public string Process(string text) + => text.ReplaceFromChart(RomanizationTable); + } + } +} diff --git a/RomanizationTests/RussianTests/AlaLcSystemTests.cs b/RomanizationTests/RussianTests/AlaLcSystemTests.cs deleted file mode 100644 index 8b62a4b..0000000 --- a/RomanizationTests/RussianTests/AlaLcSystemTests.cs +++ /dev/null @@ -1,24 +0,0 @@ -using Microsoft.VisualStudio.TestTools.UnitTesting; - -// ReSharper disable CheckNamespace -// ReSharper disable IdentifierTypo -// ReSharper disable StringLiteralTypo - -namespace Romanization.Tests.RussianTests -{ - [TestClass] - public class AlaLcSystemTests - { - [TestMethod] - public void ProcessTest() - { - Assert.AreEqual("", Russian.AlaLc.Value.Process("")); - Assert.AreEqual("Ėlektrogorsk", Russian.AlaLc.Value.Process("Электрогорск")); - Assert.AreEqual("Radioėlektronika", Russian.AlaLc.Value.Process("Радиоэлектроника")); - Assert.AreEqual("T͡Simli͡ansk", Russian.AlaLc.Value.Process("Цимлянск")); - Assert.AreEqual("Severobaĭkalʹsk", Russian.AlaLc.Value.Process("Северобайкальск")); - Assert.AreEqual("Ĭoshkar-Ola", Russian.AlaLc.Value.Process("Йошкар-Ола")); - Assert.AreEqual("Rossii͡a", Russian.AlaLc.Value.Process("Россия")); - } - } -} \ No newline at end of file diff --git a/RomanizationTests/RussianTests/AlaLcTests.cs b/RomanizationTests/RussianTests/AlaLcTests.cs new file mode 100644 index 0000000..4c2bab2 --- /dev/null +++ b/RomanizationTests/RussianTests/AlaLcTests.cs @@ -0,0 +1,36 @@ +using Microsoft.VisualStudio.TestTools.UnitTesting; + +// ReSharper disable CheckNamespace +// ReSharper disable IdentifierTypo +// ReSharper disable StringLiteralTypo + +namespace Romanization.Tests.RussianTests +{ + [TestClass] + public class AlaLcTests + { + [TestMethod] + public void ProcessTest() + { + Assert.AreEqual("", Russian.AlaLc.Value.Process("")); + Assert.AreEqual("Ėlektrogorsk", Russian.AlaLc.Value.Process("Электрогорск")); + Assert.AreEqual("Radioėlektronika", Russian.AlaLc.Value.Process("Радиоэлектроника")); + Assert.AreEqual("T͡simli͡ansk", Russian.AlaLc.Value.Process("Цимлянск")); + Assert.AreEqual("Severobaĭkalʹsk", Russian.AlaLc.Value.Process("Северобайкальск")); + Assert.AreEqual("Ĭoshkar-Ola", Russian.AlaLc.Value.Process("Йошкар-Ола")); + Assert.AreEqual("Rossii͡a", Russian.AlaLc.Value.Process("Россия")); + Assert.AreEqual("Ygyatta", Russian.AlaLc.Value.Process("Ыгыатта")); + Assert.AreEqual("Kuyrkʺi͡avr", Russian.AlaLc.Value.Process("Куыркъявр")); + Assert.AreEqual("Ulan-Udė", Russian.AlaLc.Value.Process("Улан-Удэ")); + Assert.AreEqual("Tyaĭa", Russian.AlaLc.Value.Process("Тыайа")); + Assert.AreEqual("Chapaevsk", Russian.AlaLc.Value.Process("Чапаевск")); + Assert.AreEqual("Meĭerovka", Russian.AlaLc.Value.Process("Мейеровка")); + Assert.AreEqual("Barnaul", Russian.AlaLc.Value.Process("Барнаул")); + Assert.AreEqual("I͡akutsk", Russian.AlaLc.Value.Process("Якутск")); + Assert.AreEqual("Yttyk-Këlʹ", Russian.AlaLc.Value.Process("Ыттык-Кёль")); + Assert.AreEqual("Ufa", Russian.AlaLc.Value.Process("Уфа")); + Assert.AreEqual("rádostʹ", Russian.AlaLc.Value.Process("ра́дость")); + Assert.AreEqual("radostʹ t͡svetok", Russian.AlaLc.Value.Process("радость цветок")); + } + } +} diff --git a/RomanizationTests/RussianTests/BgnPcgnTests.cs b/RomanizationTests/RussianTests/BgnPcgnTests.cs new file mode 100644 index 0000000..222a1b0 --- /dev/null +++ b/RomanizationTests/RussianTests/BgnPcgnTests.cs @@ -0,0 +1,36 @@ +using Microsoft.VisualStudio.TestTools.UnitTesting; + +// ReSharper disable CheckNamespace +// ReSharper disable IdentifierTypo +// ReSharper disable StringLiteralTypo + +namespace Romanization.Tests.RussianTests +{ + [TestClass] + public class BgnPcgnTests + { + [TestMethod] + public void ProcessTest() + { + Assert.AreEqual("", Russian.BgnPcgn.Value.Process("")); + Assert.AreEqual("Elektrogorsk", Russian.BgnPcgn.Value.Process("Электрогорск")); + Assert.AreEqual("Radioelektronika", Russian.BgnPcgn.Value.Process("Радиоэлектроника")); + Assert.AreEqual("Tsimlyansk", Russian.BgnPcgn.Value.Process("Цимлянск")); + Assert.AreEqual("Severobaykalʹsk", Russian.BgnPcgn.Value.Process("Северобайкальск")); + Assert.AreEqual("Yoshkar-Ola", Russian.BgnPcgn.Value.Process("Йошкар-Ола")); + Assert.AreEqual("Rossiya", Russian.BgnPcgn.Value.Process("Россия")); + Assert.AreEqual("Ygy·atta", Russian.BgnPcgn.Value.Process("Ыгыатта")); + Assert.AreEqual("Ku·yrkʺyavr", Russian.BgnPcgn.Value.Process("Куыркъявр")); + Assert.AreEqual("Ulan-Ud·e", Russian.BgnPcgn.Value.Process("Улан-Удэ")); + Assert.AreEqual("Ty·ay·a", Russian.BgnPcgn.Value.Process("Тыайа")); + Assert.AreEqual("Chapayevsk", Russian.BgnPcgn.Value.Process("Чапаевск")); + Assert.AreEqual("Meyyerovka", Russian.BgnPcgn.Value.Process("Мейеровка")); + Assert.AreEqual("Barnaul", Russian.BgnPcgn.Value.Process("Барнаул")); + Assert.AreEqual("Yakut·sk", Russian.BgnPcgn.Value.Process("Якутск")); + Assert.AreEqual("Yttyk-Këlʹ", Russian.BgnPcgn.Value.Process("Ыттык-Кёль")); + Assert.AreEqual("Ufa", Russian.BgnPcgn.Value.Process("Уфа")); + Assert.AreEqual("rádostʹ", Russian.BgnPcgn.Value.Process("ра́дость")); + Assert.AreEqual("radostʹ tsvetok", Russian.BgnPcgn.Value.Process("радость цветок")); + } + } +} diff --git a/RomanizationTests/RussianTests/Bs29791958Tests.cs b/RomanizationTests/RussianTests/Bs29791958Tests.cs new file mode 100644 index 0000000..7ccaba6 --- /dev/null +++ b/RomanizationTests/RussianTests/Bs29791958Tests.cs @@ -0,0 +1,36 @@ +using Microsoft.VisualStudio.TestTools.UnitTesting; + +// ReSharper disable CheckNamespace +// ReSharper disable IdentifierTypo +// ReSharper disable StringLiteralTypo + +namespace Romanization.Tests.RussianTests +{ + [TestClass] + public class Bs29791958Tests + { + [TestMethod] + public void ProcessTest() + { + Assert.AreEqual("", Russian.Bs29791958.Value.Process("")); + Assert.AreEqual("Élektrogorsk", Russian.Bs29791958.Value.Process("Электрогорск")); + Assert.AreEqual("Radioélektronika", Russian.Bs29791958.Value.Process("Радиоэлектроника")); + Assert.AreEqual("Tsimlyansk", Russian.Bs29791958.Value.Process("Цимлянск")); + Assert.AreEqual("Severobaĭkalʹsk", Russian.Bs29791958.Value.Process("Северобайкальск")); + Assert.AreEqual("Ĭoshkar-Ola", Russian.Bs29791958.Value.Process("Йошкар-Ола")); + Assert.AreEqual("Rossiya", Russian.Bs29791958.Value.Process("Россия")); + Assert.AreEqual("Ȳgȳatta", Russian.Bs29791958.Value.Process("Ыгыатта")); + Assert.AreEqual("Kuȳrkʺyavr", Russian.Bs29791958.Value.Process("Куыркъявр")); + Assert.AreEqual("Ulan-Udé", Russian.Bs29791958.Value.Process("Улан-Удэ")); + Assert.AreEqual("Tȳaĭa", Russian.Bs29791958.Value.Process("Тыайа")); + Assert.AreEqual("Chapaevsk", Russian.Bs29791958.Value.Process("Чапаевск")); + Assert.AreEqual("Meĭerovka", Russian.Bs29791958.Value.Process("Мейеровка")); + Assert.AreEqual("Barnaul", Russian.Bs29791958.Value.Process("Барнаул")); + Assert.AreEqual("Yakut-sk", Russian.Bs29791958.Value.Process("Якутск")); + Assert.AreEqual("Ȳttȳk-Këlʹ", Russian.Bs29791958.Value.Process("Ыттык-Кёль")); + Assert.AreEqual("Ufa", Russian.Bs29791958.Value.Process("Уфа")); + Assert.AreEqual("rádostʹ", Russian.Bs29791958.Value.Process("ра́дость")); + Assert.AreEqual("radostʹ tsvetok", Russian.Bs29791958.Value.Process("радость цветок")); + } + } +} diff --git a/RomanizationTests/RussianTests/Gost16876711Tests.cs b/RomanizationTests/RussianTests/Gost16876711Tests.cs new file mode 100644 index 0000000..70dfcbc --- /dev/null +++ b/RomanizationTests/RussianTests/Gost16876711Tests.cs @@ -0,0 +1,36 @@ +using Microsoft.VisualStudio.TestTools.UnitTesting; + +// ReSharper disable CheckNamespace +// ReSharper disable IdentifierTypo +// ReSharper disable StringLiteralTypo + +namespace Romanization.Tests.RussianTests +{ + [TestClass] + public class Gost16876711Tests + { + [TestMethod] + public void ProcessTest() + { + Assert.AreEqual("", Russian.Gost16876711.Value.Process("")); + Assert.AreEqual("Ėlektrogorsk", Russian.Gost16876711.Value.Process("Электрогорск")); + Assert.AreEqual("Radioėlektronika", Russian.Gost16876711.Value.Process("Радиоэлектроника")); + Assert.AreEqual("Cimlânsk", Russian.Gost16876711.Value.Process("Цимлянск")); + Assert.AreEqual("Severobajkalʹsk", Russian.Gost16876711.Value.Process("Северобайкальск")); + Assert.AreEqual("Joškar-Ola", Russian.Gost16876711.Value.Process("Йошкар-Ола")); + Assert.AreEqual("Rossiâ", Russian.Gost16876711.Value.Process("Россия")); + Assert.AreEqual("Ygyatta", Russian.Gost16876711.Value.Process("Ыгыатта")); + Assert.AreEqual("Kuyrkʺâvr", Russian.Gost16876711.Value.Process("Куыркъявр")); + Assert.AreEqual("Ulan-Udė", Russian.Gost16876711.Value.Process("Улан-Удэ")); + Assert.AreEqual("Tyaja", Russian.Gost16876711.Value.Process("Тыайа")); + Assert.AreEqual("Čapaevsk", Russian.Gost16876711.Value.Process("Чапаевск")); + Assert.AreEqual("Mejerovka", Russian.Gost16876711.Value.Process("Мейеровка")); + Assert.AreEqual("Barnaul", Russian.Gost16876711.Value.Process("Барнаул")); + Assert.AreEqual("Âkutsk", Russian.Gost16876711.Value.Process("Якутск")); + Assert.AreEqual("Yttyk-Këlʹ", Russian.Gost16876711.Value.Process("Ыттык-Кёль")); + Assert.AreEqual("Ufa", Russian.Gost16876711.Value.Process("Уфа")); + Assert.AreEqual("rádostʹ", Russian.Gost16876711.Value.Process("ра́дость")); + Assert.AreEqual("radostʹ cvetok", Russian.Gost16876711.Value.Process("радость цветок")); + } + } +} diff --git a/RomanizationTests/RussianTests/Gost16876712Tests.cs b/RomanizationTests/RussianTests/Gost16876712Tests.cs new file mode 100644 index 0000000..0dda526 --- /dev/null +++ b/RomanizationTests/RussianTests/Gost16876712Tests.cs @@ -0,0 +1,36 @@ +using Microsoft.VisualStudio.TestTools.UnitTesting; + +// ReSharper disable CheckNamespace +// ReSharper disable IdentifierTypo +// ReSharper disable StringLiteralTypo + +namespace Romanization.Tests.RussianTests +{ + [TestClass] + public class Gost16876712Tests + { + [TestMethod] + public void ProcessTest() + { + Assert.AreEqual("", Russian.Gost16876712.Value.Process("")); + Assert.AreEqual("Ehlektrogorsk", Russian.Gost16876712.Value.Process("Электрогорск")); + Assert.AreEqual("Radioehlektronika", Russian.Gost16876712.Value.Process("Радиоэлектроника")); + Assert.AreEqual("Cimljansk", Russian.Gost16876712.Value.Process("Цимлянск")); + Assert.AreEqual("Severobajkalʹsk", Russian.Gost16876712.Value.Process("Северобайкальск")); + Assert.AreEqual("Joshkar-Ola", Russian.Gost16876712.Value.Process("Йошкар-Ола")); + Assert.AreEqual("Rossija", Russian.Gost16876712.Value.Process("Россия")); + Assert.AreEqual("Ygyatta", Russian.Gost16876712.Value.Process("Ыгыатта")); + Assert.AreEqual("Kuyrkʺjavr", Russian.Gost16876712.Value.Process("Куыркъявр")); + Assert.AreEqual("Ulan-Udeh", Russian.Gost16876712.Value.Process("Улан-Удэ")); + Assert.AreEqual("Tyaja", Russian.Gost16876712.Value.Process("Тыайа")); + Assert.AreEqual("Chapaevsk", Russian.Gost16876712.Value.Process("Чапаевск")); + Assert.AreEqual("Mejerovka", Russian.Gost16876712.Value.Process("Мейеровка")); + Assert.AreEqual("Barnaul", Russian.Gost16876712.Value.Process("Барнаул")); + Assert.AreEqual("Jakutsk", Russian.Gost16876712.Value.Process("Якутск")); + Assert.AreEqual("Yttyk-Kjolʹ", Russian.Gost16876712.Value.Process("Ыттык-Кёль")); + Assert.AreEqual("Ufa", Russian.Gost16876712.Value.Process("Уфа")); + Assert.AreEqual("rádostʹ", Russian.Gost16876712.Value.Process("ра́дость")); + Assert.AreEqual("radostʹ cvetok", Russian.Gost16876712.Value.Process("радость цветок")); + } + } +} diff --git a/RomanizationTests/RussianTests/Gost7792000ATests.cs b/RomanizationTests/RussianTests/Gost7792000ATests.cs new file mode 100644 index 0000000..96d2b7e --- /dev/null +++ b/RomanizationTests/RussianTests/Gost7792000ATests.cs @@ -0,0 +1,36 @@ +using Microsoft.VisualStudio.TestTools.UnitTesting; + +// ReSharper disable CheckNamespace +// ReSharper disable IdentifierTypo +// ReSharper disable StringLiteralTypo + +namespace Romanization.Tests.RussianTests +{ + [TestClass] + public class Gost7792000ATests + { + [TestMethod] + public void ProcessTest() + { + Assert.AreEqual("", Russian.Gost7792000A.Value.Process("")); + Assert.AreEqual("Èlektrogorsk", Russian.Gost7792000A.Value.Process("Электрогорск")); + Assert.AreEqual("Radioèlektronika", Russian.Gost7792000A.Value.Process("Радиоэлектроника")); + Assert.AreEqual("Cimlânsk", Russian.Gost7792000A.Value.Process("Цимлянск")); + Assert.AreEqual("Severobajkalʹsk", Russian.Gost7792000A.Value.Process("Северобайкальск")); + Assert.AreEqual("Joškar-Ola", Russian.Gost7792000A.Value.Process("Йошкар-Ола")); + Assert.AreEqual("Rossiâ", Russian.Gost7792000A.Value.Process("Россия")); + Assert.AreEqual("Ygyatta", Russian.Gost7792000A.Value.Process("Ыгыатта")); + Assert.AreEqual("Kuyrkʺâvr", Russian.Gost7792000A.Value.Process("Куыркъявр")); + Assert.AreEqual("Ulan-Udè", Russian.Gost7792000A.Value.Process("Улан-Удэ")); + Assert.AreEqual("Tyaja", Russian.Gost7792000A.Value.Process("Тыайа")); + Assert.AreEqual("Čapaevsk", Russian.Gost7792000A.Value.Process("Чапаевск")); + Assert.AreEqual("Mejerovka", Russian.Gost7792000A.Value.Process("Мейеровка")); + Assert.AreEqual("Barnaul", Russian.Gost7792000A.Value.Process("Барнаул")); + Assert.AreEqual("Âkutsk", Russian.Gost7792000A.Value.Process("Якутск")); + Assert.AreEqual("Yttyk-Këlʹ", Russian.Gost7792000A.Value.Process("Ыттык-Кёль")); + Assert.AreEqual("Ufa", Russian.Gost7792000A.Value.Process("Уфа")); + Assert.AreEqual("rádostʹ", Russian.Gost7792000A.Value.Process("ра́дость")); + Assert.AreEqual("radostʹ cvetok", Russian.Gost7792000A.Value.Process("радость цветок")); + } + } +} diff --git a/RomanizationTests/RussianTests/Gost7792000BTests.cs b/RomanizationTests/RussianTests/Gost7792000BTests.cs new file mode 100644 index 0000000..6c0bfa3 --- /dev/null +++ b/RomanizationTests/RussianTests/Gost7792000BTests.cs @@ -0,0 +1,36 @@ +using Microsoft.VisualStudio.TestTools.UnitTesting; + +// ReSharper disable CheckNamespace +// ReSharper disable IdentifierTypo +// ReSharper disable StringLiteralTypo + +namespace Romanization.Tests.RussianTests +{ + [TestClass] + public class Gost7792000BTests + { + [TestMethod] + public void ProcessTest() + { + Assert.AreEqual("", Russian.Gost7792000B.Value.Process("")); + Assert.AreEqual("E`lektrogorsk", Russian.Gost7792000B.Value.Process("Электрогорск")); + Assert.AreEqual("Radioe`lektronika", Russian.Gost7792000B.Value.Process("Радиоэлектроника")); + Assert.AreEqual("Cimlyansk", Russian.Gost7792000B.Value.Process("Цимлянск")); + Assert.AreEqual("Severobajkalʹsk", Russian.Gost7792000B.Value.Process("Северобайкальск")); + Assert.AreEqual("Joshkar-Ola", Russian.Gost7792000B.Value.Process("Йошкар-Ола")); + Assert.AreEqual("Rossiya", Russian.Gost7792000B.Value.Process("Россия")); + Assert.AreEqual("Ygyatta", Russian.Gost7792000B.Value.Process("Ыгыатта")); + Assert.AreEqual("Kuyrkʺyavr", Russian.Gost7792000B.Value.Process("Куыркъявр")); + Assert.AreEqual("Ulan-Ude`", Russian.Gost7792000B.Value.Process("Улан-Удэ")); + Assert.AreEqual("Tyaja", Russian.Gost7792000B.Value.Process("Тыайа")); + Assert.AreEqual("Chapaevsk", Russian.Gost7792000B.Value.Process("Чапаевск")); + Assert.AreEqual("Mejerovka", Russian.Gost7792000B.Value.Process("Мейеровка")); + Assert.AreEqual("Barnaul", Russian.Gost7792000B.Value.Process("Барнаул")); + Assert.AreEqual("Yakutsk", Russian.Gost7792000B.Value.Process("Якутск")); + Assert.AreEqual("Yttyk-Kyolʹ", Russian.Gost7792000B.Value.Process("Ыттык-Кёль")); + Assert.AreEqual("Ufa", Russian.Gost7792000B.Value.Process("Уфа")); + Assert.AreEqual("rádostʹ", Russian.Gost7792000B.Value.Process("ра́дость")); + Assert.AreEqual("radostʹ czvetok", Russian.Gost7792000B.Value.Process("радость цветок")); + } + } +} diff --git a/RomanizationTests/RussianTests/Icao9303Tests.cs b/RomanizationTests/RussianTests/Icao9303Tests.cs new file mode 100644 index 0000000..16bbdb7 --- /dev/null +++ b/RomanizationTests/RussianTests/Icao9303Tests.cs @@ -0,0 +1,36 @@ +using Microsoft.VisualStudio.TestTools.UnitTesting; + +// ReSharper disable CheckNamespace +// ReSharper disable IdentifierTypo +// ReSharper disable StringLiteralTypo + +namespace Romanization.Tests.RussianTests +{ + [TestClass] + public class Icao9303Tests + { + [TestMethod] + public void ProcessTest() + { + Assert.AreEqual("", Russian.Icao9303.Value.Process("")); + Assert.AreEqual("Elektrogorsk", Russian.Icao9303.Value.Process("Электрогорск")); + Assert.AreEqual("Radioelektronika", Russian.Icao9303.Value.Process("Радиоэлектроника")); + Assert.AreEqual("Tsimliansk", Russian.Icao9303.Value.Process("Цимлянск")); + Assert.AreEqual("Severobaikalsk", Russian.Icao9303.Value.Process("Северобайкальск")); + Assert.AreEqual("Ioshkar-Ola", Russian.Icao9303.Value.Process("Йошкар-Ола")); + Assert.AreEqual("Rossiia", Russian.Icao9303.Value.Process("Россия")); + Assert.AreEqual("Ygyatta", Russian.Icao9303.Value.Process("Ыгыатта")); + Assert.AreEqual("Kuyrkieiavr", Russian.Icao9303.Value.Process("Куыркъявр")); + Assert.AreEqual("Ulan-Ude", Russian.Icao9303.Value.Process("Улан-Удэ")); + Assert.AreEqual("Tyaia", Russian.Icao9303.Value.Process("Тыайа")); + Assert.AreEqual("Chapaevsk", Russian.Icao9303.Value.Process("Чапаевск")); + Assert.AreEqual("Meierovka", Russian.Icao9303.Value.Process("Мейеровка")); + Assert.AreEqual("Barnaul", Russian.Icao9303.Value.Process("Барнаул")); + Assert.AreEqual("Iakutsk", Russian.Icao9303.Value.Process("Якутск")); + Assert.AreEqual("Yttyk-Kel", Russian.Icao9303.Value.Process("Ыттык-Кёль")); + Assert.AreEqual("Ufa", Russian.Icao9303.Value.Process("Уфа")); + Assert.AreEqual("rádost", Russian.Icao9303.Value.Process("ра́дость")); + Assert.AreEqual("radost tsvetok", Russian.Icao9303.Value.Process("радость цветок")); + } + } +} diff --git a/RomanizationTests/RussianTests/IsoR9Tests.cs b/RomanizationTests/RussianTests/IsoR9Tests.cs new file mode 100644 index 0000000..03444e7 --- /dev/null +++ b/RomanizationTests/RussianTests/IsoR9Tests.cs @@ -0,0 +1,36 @@ +using Microsoft.VisualStudio.TestTools.UnitTesting; + +// ReSharper disable CheckNamespace +// ReSharper disable IdentifierTypo +// ReSharper disable StringLiteralTypo + +namespace Romanization.Tests.RussianTests +{ + [TestClass] + public class IsoR9Tests + { + [TestMethod] + public void ProcessTest() + { + Assert.AreEqual("", Russian.IsoR9.Value.Process("")); + Assert.AreEqual("Ėlektrogorsk", Russian.IsoR9.Value.Process("Электрогорск")); + Assert.AreEqual("Radioėlektronika", Russian.IsoR9.Value.Process("Радиоэлектроника")); + Assert.AreEqual("Cimljansk", Russian.IsoR9.Value.Process("Цимлянск")); + Assert.AreEqual("Severobajkalʹsk", Russian.IsoR9.Value.Process("Северобайкальск")); + Assert.AreEqual("Joškar-Ola", Russian.IsoR9.Value.Process("Йошкар-Ола")); + Assert.AreEqual("Rossija", Russian.IsoR9.Value.Process("Россия")); + Assert.AreEqual("Ygyatta", Russian.IsoR9.Value.Process("Ыгыатта")); + Assert.AreEqual("Kuyrkʺjavr", Russian.IsoR9.Value.Process("Куыркъявр")); + Assert.AreEqual("Ulan-Udė", Russian.IsoR9.Value.Process("Улан-Удэ")); + Assert.AreEqual("Tyaja", Russian.IsoR9.Value.Process("Тыайа")); + Assert.AreEqual("Čapaevsk", Russian.IsoR9.Value.Process("Чапаевск")); + Assert.AreEqual("Mejerovka", Russian.IsoR9.Value.Process("Мейеровка")); + Assert.AreEqual("Barnaul", Russian.IsoR9.Value.Process("Барнаул")); + Assert.AreEqual("Jakutsk", Russian.IsoR9.Value.Process("Якутск")); + Assert.AreEqual("Yttyk-Këlʹ", Russian.IsoR9.Value.Process("Ыттык-Кёль")); + Assert.AreEqual("Ufa", Russian.IsoR9.Value.Process("Уфа")); + Assert.AreEqual("rádostʹ", Russian.IsoR9.Value.Process("ра́дость")); + Assert.AreEqual("radostʹ cvetok", Russian.IsoR9.Value.Process("радость цветок")); + } + } +} diff --git a/RomanizationTests/RussianTests/RoadSignsTests.cs b/RomanizationTests/RussianTests/RoadSignsTests.cs new file mode 100644 index 0000000..6fc0372 --- /dev/null +++ b/RomanizationTests/RussianTests/RoadSignsTests.cs @@ -0,0 +1,36 @@ +using Microsoft.VisualStudio.TestTools.UnitTesting; + +// ReSharper disable CheckNamespace +// ReSharper disable IdentifierTypo +// ReSharper disable StringLiteralTypo + +namespace Romanization.Tests.RussianTests +{ + [TestClass] + public class RoadSignsTests + { + [TestMethod] + public void ProcessTest() + { + Assert.AreEqual("", Russian.RoadSigns.Value.Process("")); + Assert.AreEqual("Elektrogorsk", Russian.RoadSigns.Value.Process("Электрогорск")); + Assert.AreEqual("Radioelektronika", Russian.RoadSigns.Value.Process("Радиоэлектроника")); + Assert.AreEqual("Tsimlyansk", Russian.RoadSigns.Value.Process("Цимлянск")); + Assert.AreEqual("Severobaykalʹsk", Russian.RoadSigns.Value.Process("Северобайкальск")); + Assert.AreEqual("Yoshkar-Ola", Russian.RoadSigns.Value.Process("Йошкар-Ола")); + Assert.AreEqual("Rossiya", Russian.RoadSigns.Value.Process("Россия")); + Assert.AreEqual("Ygyatta", Russian.RoadSigns.Value.Process("Ыгыатта")); + Assert.AreEqual("Kuyrkʹyavr", Russian.RoadSigns.Value.Process("Куыркъявр")); + Assert.AreEqual("Ulan-Ude", Russian.RoadSigns.Value.Process("Улан-Удэ")); + Assert.AreEqual("Tyaya", Russian.RoadSigns.Value.Process("Тыайа")); + Assert.AreEqual("Chapayevsk", Russian.RoadSigns.Value.Process("Чапаевск")); + Assert.AreEqual("Meyerovka", Russian.RoadSigns.Value.Process("Мейеровка")); + Assert.AreEqual("Barnaul", Russian.RoadSigns.Value.Process("Барнаул")); + Assert.AreEqual("Yakutsk", Russian.RoadSigns.Value.Process("Якутск")); + Assert.AreEqual("Yttyk-Kyelʹ", Russian.RoadSigns.Value.Process("Ыттык-Кёль")); + Assert.AreEqual("Ufa", Russian.RoadSigns.Value.Process("Уфа")); + Assert.AreEqual("rádostʹ", Russian.RoadSigns.Value.Process("ра́дость")); + Assert.AreEqual("radostʹ tsvetok", Russian.RoadSigns.Value.Process("радость цветок")); + } + } +} diff --git a/RomanizationTests/RussianTests/ScholarlyTests.cs b/RomanizationTests/RussianTests/ScholarlyTests.cs new file mode 100644 index 0000000..5471a33 --- /dev/null +++ b/RomanizationTests/RussianTests/ScholarlyTests.cs @@ -0,0 +1,36 @@ +using Microsoft.VisualStudio.TestTools.UnitTesting; + +// ReSharper disable CheckNamespace +// ReSharper disable IdentifierTypo +// ReSharper disable StringLiteralTypo + +namespace Romanization.Tests.RussianTests +{ + [TestClass] + public class ScholarlyTests + { + [TestMethod] + public void ProcessTest() + { + Assert.AreEqual("", Russian.Scholarly.Value.Process("")); + Assert.AreEqual("Èlektrogorsk", Russian.Scholarly.Value.Process("Электрогорск")); + Assert.AreEqual("Radioèlektronika", Russian.Scholarly.Value.Process("Радиоэлектроника")); + Assert.AreEqual("Cimljansk", Russian.Scholarly.Value.Process("Цимлянск")); + Assert.AreEqual("Severobajkalʹsk", Russian.Scholarly.Value.Process("Северобайкальск")); + Assert.AreEqual("Joškar-Ola", Russian.Scholarly.Value.Process("Йошкар-Ола")); + Assert.AreEqual("Rossija", Russian.Scholarly.Value.Process("Россия")); + Assert.AreEqual("Ygyatta", Russian.Scholarly.Value.Process("Ыгыатта")); + Assert.AreEqual("Kuyrkʺjavr", Russian.Scholarly.Value.Process("Куыркъявр")); + Assert.AreEqual("Ulan-Udè", Russian.Scholarly.Value.Process("Улан-Удэ")); + Assert.AreEqual("Tyaja", Russian.Scholarly.Value.Process("Тыайа")); + Assert.AreEqual("Čapaevsk", Russian.Scholarly.Value.Process("Чапаевск")); + Assert.AreEqual("Mejerovka", Russian.Scholarly.Value.Process("Мейеровка")); + Assert.AreEqual("Barnaul", Russian.Scholarly.Value.Process("Барнаул")); + Assert.AreEqual("Jakutsk", Russian.Scholarly.Value.Process("Якутск")); + Assert.AreEqual("Yttyk-Këlʹ", Russian.Scholarly.Value.Process("Ыттык-Кёль")); + Assert.AreEqual("Ufa", Russian.Scholarly.Value.Process("Уфа")); + Assert.AreEqual("rádostʹ", Russian.Scholarly.Value.Process("ра́дость")); + Assert.AreEqual("radostʹ cvetok", Russian.Scholarly.Value.Process("радость цветок")); + } + } +}