Skip to content

Commit

Permalink
English X-SAMPA fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
lottev1991 committed Oct 17, 2023
1 parent 0eac6f4 commit 2565a23
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 13 deletions.
7 changes: 7 additions & 0 deletions OpenUtau.Plugin.Builtin/Data/en-xsampa.template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,13 @@ symbols:
- {symbol: VI, type: vowel}
- {symbol: VU, type: vowel}
- {symbol: '@U', type: vowel}
- {symbol: ai, type: vowel}
- {symbol: ei, type: vowel}
- {symbol: Oi, type: vowel}
- {symbol: au, type: vowel}
- {symbol: ou, type: vowel}
- {symbol: Ou, type: vowel}
- {symbol: '@u', type: vowel}
- {symbol: 'i:', type: vowel}
- {symbol: 'u:', type: vowel}
- {symbol: 'O:', type: vowel}
Expand Down
26 changes: 13 additions & 13 deletions OpenUtau.Plugin.Builtin/EnXSampaPhonemizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ public class EnXSampaPhonemizer : SyllableBasedPhonemizer {
/// Due to the flexibility of X-SAMPA, it was easy to add the custom sounds. More suggestions for this are always welcome.
///</summary>

private readonly string[] vowels = "a,A,@,{,V,O,aU,aI,E,3,eI,I,i,oU,OI,U,u,Q,Ol,Ql,aUn,e@,eN,IN,e,o,Ar,Qr,Er,Ir,Or,Ur,ir,ur,aIr,aUr,A@,Q@,E@,I@,O@,U@,i@,u@,aI@,aU@,@r,@l,@m,@n,@N,1,e@m,e@n,y,I\\,M,U\\,Y,@\\,@`,3`,A`,Q`,E`,I`,O`,U`,i`,u`,aI`,aU`,},2,3\\,6,7,8,9,&,{~,I~,aU~,VI,VU,@U,i:,u:,O:,e@0,E~,e~,3r,ar,or,{l,Al,al,El,Il,il,ul,Ul,mm,nn,ll,NN".Split(',');
private readonly string[] vowels = "a,A,@,{,V,O,aU,aI,E,3,eI,I,i,oU,OI,U,u,Q,Ol,Ql,aUn,e@,eN,IN,e,o,Ar,Qr,Er,Ir,Or,Ur,ir,ur,aIr,aUr,A@,Q@,E@,I@,O@,U@,i@,u@,aI@,aU@,@r,@l,@m,@n,@N,1,e@m,e@n,y,I\\,M,U\\,Y,@\\,@`,3`,A`,Q`,E`,I`,O`,U`,i`,u`,aI`,aU`,},2,3\\,6,7,8,9,&,{~,I~,aU~,VI,VU,@U,ai,ei,Oi,au,ou,Ou,@u,i:,u:,O:,e@0,E~,e~,3r,ar,or,{l,Al,al,El,Il,il,ul,Ul,mm,nn,ll,NN".Split(',');
private readonly string[] consonants = "b,tS,d,D,4,f,g,h,dZ,k,l,m,n,N,p,r,s,S,t,T,v,w,W,j,z,Z,t_},・,_".Split(',');
private readonly string[] affricates = "tS,dZ".Split(',');
private readonly string[] shortConsonants = "4".Split(",");
Expand All @@ -35,7 +35,7 @@ public class EnXSampaPhonemizer : SyllableBasedPhonemizer {
.ToDictionary(parts => parts[0], parts => parts[1]);

// For banks aliased with VOCALOID-style phonemes
private readonly Dictionary<string, string> vocaSampa = "A=Q;E=e;i=i:;u=u:;O=O:;3=@r;oU=@U".Split(';')
private readonly Dictionary<string, string> vocaSampa = "aIr=Q@;eIr-e@;aUr=Q@;oUr=Or;A=Q;E=e;i=i:;u=u:;O=O:;3=@r;oU=@U;Ar=Q@;Qr=Q@;Er=e@;er=e@;Ir=I@;ir=I@;i:r=I@;Or=O@;O:r=O@;Ur=U@;ur=U@;u:r=U@".Split(';')
.Select(entry => entry.Split('='))
.Where(parts => parts.Length == 2)
.Where(parts => parts[0] != parts[1])
Expand Down Expand Up @@ -143,12 +143,12 @@ protected override string[] GetSymbols(Note note) {
}
List<string> modified = new List<string>();
// Splits diphthongs and affricates if not present in the bank
string[] diphthongs = new[] { "aI", "eI", "OI", "aU", "oU", "VI", "VU", "@U" };
string[] diphthongs = new[] { "aI", "eI", "OI", "aU", "oU", "VI", "VU", "@U", "ai", "ei", "Oi", "au", "ou", "Ou", "@u", };
string[] affricates = new[] { "dZ", "tS" };
foreach (string s in original) {
if (diphthongs.Contains(s) && !HasOto($"{s} b", note.tone)) {
if (diphthongs.Contains(s) && !HasOto($"- {s}", note.tone) && !HasOto(s, note.tone) && !HasOto(ValidateAlias($"- {s}"), note.tone) && !HasOto(ValidateAlias(s), note.tone)) {
modified.AddRange(new string[] { s[0].ToString(), s[1] + '^'.ToString() });
} else if (affricates.Contains(s) && !HasOto($"i {s}", note.tone) && !HasOto($"i: {s}", note.tone)) {
} else if (affricates.Contains(s) && !HasOto($"{s}A", note.tone) && !HasOto($"{s} A", note.tone) && !HasOto($"{s}Q", note.tone) && !HasOto($"{s} Q", note.tone)) {
modified.AddRange(new string[] { s[0].ToString(), s[1].ToString() });
} else {
modified.Add(s);
Expand All @@ -169,27 +169,27 @@ protected override List<string> ProcessSyllable(Syllable syllable) {
var rv = $"- {v}";

// Switch between phonetic systems, depending on certain aliases in the bank
if (HasOto($"i: b", syllable.tone) || !HasOto($"3 b", syllable.tone)) {
if (HasOto($"- i:", syllable.vowelTone) || HasOto($"i:", syllable.vowelTone) || (!HasOto($"- 3", syllable.vowelTone) && !HasOto($"3", syllable.vowelTone))) {
isVocaSampa = true;
}

if (!HasOto($"V b", syllable.vowelTone)) {
if (!HasOto($"- V", syllable.vowelTone) && !HasOto($"V", syllable.vowelTone)) {
isSimpleDelta = true;
}

if (!HasOto($"I b", syllable.vowelTone)) {
if ((!HasOto($"- I", syllable.vowelTone) && !HasOto($"I", syllable.vowelTone)) || (!HasOto($"- U", syllable.vowelTone) && !HasOto($"U", syllable.vowelTone))) {
isMiniDelta = true;
}

if (HasOto("", syllable.vowelTone)) {
if (HasOto("あ", syllable.vowelTone) || HasOto("- あ", syllable.vowelTone)) {
isEnPlusJa = true;
}

if (HasOto($"{prevV} r\\", syllable.tone)) {
isTrueXSampa = true;
}

if (!HasOto($"3 b", syllable.tone) && !HasOto($"@` b", syllable.tone)) {
if ((!HasOto($"- 3", syllable.vowelTone) && !HasOto($"3", syllable.vowelTone)) || (!HasOto($"- @`", syllable.vowelTone) && !HasOto($"@`", syllable.vowelTone))) {
isSalemList = true;
}

Expand Down Expand Up @@ -222,7 +222,7 @@ protected override List<string> ProcessSyllable(Syllable syllable) {
var cv = $"{cc[0]}{v}";
if (HasOto(rcv, syllable.vowelTone) || HasOto(ValidateAlias(rcv), syllable.vowelTone)) {
basePhoneme = rcv;
} else if ((!HasOto(rcv, syllable.vowelTone) || !HasOto(ValidateAlias(rcv), syllable.vowelTone)) && (HasOto(crv, syllable.vowelTone) || HasOto(ValidateAlias(crv), syllable.vowelTone))) {
} else if ((!HasOto(rcv, syllable.vowelTone) && !HasOto(ValidateAlias(rcv), syllable.vowelTone)) && (HasOto(crv, syllable.vowelTone) || HasOto(ValidateAlias(crv), syllable.vowelTone))) {
basePhoneme = crv;
TryAddPhoneme(phonemes, syllable.tone, $"- {cc[0]}", ValidateAlias($"- {cc[0]}"));
} else {
Expand Down Expand Up @@ -299,7 +299,7 @@ protected override List<string> ProcessSyllable(Syllable syllable) {
lastC = i;
basePhoneme = ccv;
break;
} else if ((HasOto(rccv, syllable.vowelTone) || HasOto(ValidateAlias(rccv), syllable.vowelTone)) && (!HasOto(ccv, syllable.vowelTone) || !HasOto(ValidateAlias(ccv), syllable.vowelTone))) {
} else if ((HasOto(rccv, syllable.vowelTone) || HasOto(ValidateAlias(rccv), syllable.vowelTone)) && (!HasOto(ccv, syllable.vowelTone) && !HasOto(ValidateAlias(ccv), syllable.vowelTone))) {
lastC = i;
basePhoneme = rccv;
break;
Expand Down Expand Up @@ -432,7 +432,7 @@ protected override List<string> ProcessEnding(Ending ending) {
var vcr2 = $"{v}{cc[0]} -";
if (HasOto(vcr, ending.tone) || HasOto(ValidateAlias(vcr), ending.tone)) {
phonemes.Add(vcr);
} else if ((!HasOto(vcr, ending.tone) || !HasOto(ValidateAlias(vcr), ending.tone)) && (HasOto(vcr2, ending.tone) || HasOto(ValidateAlias(vcr2), ending.tone))) {
} else if ((!HasOto(vcr, ending.tone) && !HasOto(ValidateAlias(vcr), ending.tone)) && (HasOto(vcr2, ending.tone) || HasOto(ValidateAlias(vcr2), ending.tone))) {
phonemes.Add(vcr2);
} else {
phonemes.Add(vc);
Expand Down

0 comments on commit 2565a23

Please sign in to comment.