Skip to content

Commit

Permalink
TextAnalyzer traitにstring->AccentPhraseModel[]を移動 (#740)
Browse files Browse the repository at this point in the history
* TextAnalyzer traitにstring->AccentPhraseModel[]を移動

* refactor

* refactor

* Synthesizer<TextAnalyzer>の形で呼び出す時のために

* TextAnalyzerにClone追加

* Update crates/voicevox_core/src/text_analyzer.rs

Co-authored-by: Ryo Yamashita <[email protected]>

* remove KanaAnalyzer::new()

* mora_to_textを統合,細かいfix

---------

Co-authored-by: Ryo Yamashita <[email protected]>
  • Loading branch information
eyr1n and qryxip authored Feb 17, 2024
1 parent 9d1151f commit 5d13857
Show file tree
Hide file tree
Showing 3 changed files with 132 additions and 88 deletions.
1 change: 1 addition & 0 deletions crates/voicevox_core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ mod voice_model;

pub mod __internal;
pub mod blocking;
pub mod text_analyzer;
pub mod tokio;

#[cfg(test)]
Expand Down
97 changes: 9 additions & 88 deletions crates/voicevox_core/src/synthesizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ pub(crate) mod blocking {
use enum_map::enum_map;

use crate::{
engine::{self, create_kana, parse_kana, MoraModel, OjtPhoneme, Utterance},
engine::{create_kana, MoraModel, OjtPhoneme},
error::ErrorRepr,
infer::{
domain::{
Expand All @@ -92,6 +92,7 @@ pub(crate) mod blocking {
InferenceSessionOptions,
},
numerics::F32Ext as _,
text_analyzer::{mora_to_text, KanaAnalyzer, OpenJTalkAnalyzer, TextAnalyzer},
AccentPhraseModel, AudioQueryModel, FullcontextExtractor, Result, StyleId,
SupportedDevices, SynthesisOptions, VoiceModelId, VoiceModelMeta,
};
Expand All @@ -103,7 +104,8 @@ pub(crate) mod blocking {
/// 音声シンセサイザ。
pub struct Synthesizer<O> {
pub(super) status: Status<InferenceRuntimeImpl, InferenceDomainImpl>,
open_jtalk: O,
open_jtalk_analyzer: OpenJTalkAnalyzer<O>,
kana_analyzer: KanaAnalyzer,
use_gpu: bool,
}

Expand Down Expand Up @@ -176,7 +178,8 @@ pub(crate) mod blocking {

return Ok(Self {
status,
open_jtalk,
open_jtalk_analyzer: OpenJTalkAnalyzer::new(open_jtalk),
kana_analyzer: KanaAnalyzer,
use_gpu,
});

Expand Down Expand Up @@ -457,7 +460,8 @@ pub(crate) mod blocking {
kana: &str,
style_id: StyleId,
) -> Result<Vec<AccentPhraseModel>> {
self.replace_mora_data(&parse_kana(kana)?, style_id)
let accent_phrases = self.kana_analyzer.analyze(kana)?;
self.replace_mora_data(&accent_phrases, style_id)
}

/// AccentPhraseの配列の音高・音素長を、特定の声で生成しなおす。
Expand Down Expand Up @@ -743,75 +747,7 @@ pub(crate) mod blocking {
text: &str,
style_id: StyleId,
) -> Result<Vec<AccentPhraseModel>> {
if text.is_empty() {
return Ok(Vec::new());
}

let utterance = Utterance::extract_full_context_label(&self.open_jtalk, text)?;

let accent_phrases: Vec<AccentPhraseModel> = utterance
.breath_groups()
.iter()
.enumerate()
.fold(Vec::new(), |mut accum_vec, (i, breath_group)| {
accum_vec.extend(breath_group.accent_phrases().iter().enumerate().map(
|(j, accent_phrase)| {
let moras = accent_phrase
.moras()
.iter()
.map(|mora| {
let mora_text = mora
.phonemes()
.iter()
.map(|phoneme| phoneme.phoneme().to_string())
.collect::<Vec<_>>()
.join("");

let (consonant, consonant_length) =
if let Some(consonant) = mora.consonant() {
(Some(consonant.phoneme().to_string()), Some(0.))
} else {
(None, None)
};

MoraModel::new(
mora_to_text(mora_text),
consonant,
consonant_length,
mora.vowel().phoneme().into(),
0.,
0.,
)
})
.collect();

let pause_mora = if i != utterance.breath_groups().len() - 1
&& j == breath_group.accent_phrases().len() - 1
{
Some(MoraModel::new(
"、".into(),
None,
None,
"pau".into(),
0.,
0.,
))
} else {
None
};

AccentPhraseModel::new(
moras,
*accent_phrase.accent(),
pause_mora,
*accent_phrase.is_interrogative(),
)
},
));

accum_vec
});

let accent_phrases = self.open_jtalk_analyzer.analyze(text)?;
self.replace_mora_data(&accent_phrases, style_id)
}

Expand Down Expand Up @@ -1175,21 +1111,6 @@ pub(crate) mod blocking {
(consonant_phoneme_list, vowel_phoneme_list, vowel_indexes)
}

fn mora_to_text(mora: impl AsRef<str>) -> String {
let last_char = mora.as_ref().chars().last().unwrap();
let mora = if ['A', 'I', 'U', 'E', 'O'].contains(&last_char) {
format!(
"{}{}",
&mora.as_ref()[0..mora.as_ref().len() - 1],
last_char.to_lowercase()
)
} else {
mora.as_ref().to_string()
};
// もしカタカナに変換できなければ、引数で与えた文字列がそのまま返ってくる
engine::mora2text(&mora).to_string()
}

impl AudioQueryModel {
fn from_accent_phrases(accent_phrases: Vec<AccentPhraseModel>) -> Self {
let kana = create_kana(&accent_phrases);
Expand Down
122 changes: 122 additions & 0 deletions crates/voicevox_core/src/text_analyzer.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
use crate::{
engine::{self, parse_kana, MoraModel, Utterance},
AccentPhraseModel, FullcontextExtractor, Result,
};

pub trait TextAnalyzer {
fn analyze(&self, text: &str) -> Result<Vec<AccentPhraseModel>>;
}

/// AquesTalk風記法からAccentPhraseの配列を生成するTextAnalyzer
#[derive(Clone)]
pub struct KanaAnalyzer;

impl TextAnalyzer for KanaAnalyzer {
fn analyze(&self, text: &str) -> Result<Vec<AccentPhraseModel>> {
if text.is_empty() {
return Ok(Vec::new());
}
Ok(parse_kana(text)?)
}
}

/// OpenJtalkからAccentPhraseの配列を生成するTextAnalyzer
#[derive(Clone)]
pub struct OpenJTalkAnalyzer<O>(O);

impl<O> OpenJTalkAnalyzer<O> {
pub fn new(open_jtalk: O) -> Self {
Self(open_jtalk)
}
}

impl<O: FullcontextExtractor> TextAnalyzer for OpenJTalkAnalyzer<O> {
fn analyze(&self, text: &str) -> Result<Vec<AccentPhraseModel>> {
if text.is_empty() {
return Ok(Vec::new());
}
let utterance = Utterance::extract_full_context_label(&self.0, text)?;
Ok(utterance_to_accent_phrases(utterance))
}
}

fn utterance_to_accent_phrases(utterance: Utterance) -> Vec<AccentPhraseModel> {
let accent_phrases: Vec<AccentPhraseModel> = utterance.breath_groups().iter().enumerate().fold(
Vec::new(),
|mut accum_vec, (i, breath_group)| {
accum_vec.extend(breath_group.accent_phrases().iter().enumerate().map(
|(j, accent_phrase)| {
let moras = accent_phrase
.moras()
.iter()
.map(|mora| {
let mora_text = mora
.phonemes()
.iter()
.map(|phoneme| phoneme.phoneme().to_string())
.collect::<Vec<_>>()
.join("");

let (consonant, consonant_length) =
if let Some(consonant) = mora.consonant() {
(Some(consonant.phoneme().to_string()), Some(0.))
} else {
(None, None)
};

MoraModel::new(
mora_to_text(mora_text),
consonant,
consonant_length,
mora.vowel().phoneme().into(),
0.,
0.,
)
})
.collect();

let pause_mora = if i != utterance.breath_groups().len() - 1
&& j == breath_group.accent_phrases().len() - 1
{
Some(MoraModel::new(
"、".into(),
None,
None,
"pau".into(),
0.,
0.,
))
} else {
None
};

AccentPhraseModel::new(
moras,
*accent_phrase.accent(),
pause_mora,
*accent_phrase.is_interrogative(),
)
},
));

accum_vec
},
);

accent_phrases
}

pub fn mora_to_text(mora: impl AsRef<str>) -> String {
let last_char = mora.as_ref().chars().last().unwrap();
let mora = if ['A', 'I', 'U', 'E', 'O'].contains(&last_char) {
format!(
"{}{}",
&mora.as_ref()[0..mora.as_ref().len() - 1],
last_char.to_lowercase()
)
} else {
mora.as_ref().to_string()
};
// もしカタカナに変換できなければ、引数で与えた文字列がそのまま返ってくる
engine::mora2text(&mora).to_string()
}

0 comments on commit 5d13857

Please sign in to comment.