diff --git a/crates/voicevox_core/src/lib.rs b/crates/voicevox_core/src/lib.rs index ea74c9f7c..f0a948354 100644 --- a/crates/voicevox_core/src/lib.rs +++ b/crates/voicevox_core/src/lib.rs @@ -18,6 +18,7 @@ mod voice_model; pub mod __internal; pub mod blocking; +pub mod text_analyzer; pub mod tokio; #[cfg(test)] diff --git a/crates/voicevox_core/src/synthesizer.rs b/crates/voicevox_core/src/synthesizer.rs index 202e917c7..1ee16ace0 100644 --- a/crates/voicevox_core/src/synthesizer.rs +++ b/crates/voicevox_core/src/synthesizer.rs @@ -80,7 +80,7 @@ pub(crate) mod blocking { use enum_map::enum_map; use crate::{ - engine::{self, create_kana, parse_kana, MoraModel, OjtPhoneme, Utterance}, + engine::{create_kana, MoraModel, OjtPhoneme}, error::ErrorRepr, infer::{ domain::{ @@ -92,6 +92,7 @@ pub(crate) mod blocking { InferenceSessionOptions, }, numerics::F32Ext as _, + text_analyzer::{mora_to_text, KanaAnalyzer, OpenJTalkAnalyzer, TextAnalyzer}, AccentPhraseModel, AudioQueryModel, FullcontextExtractor, Result, StyleId, SupportedDevices, SynthesisOptions, VoiceModelId, VoiceModelMeta, }; @@ -103,7 +104,8 @@ pub(crate) mod blocking { /// 音声シンセサイザ。 pub struct Synthesizer { pub(super) status: Status, - open_jtalk: O, + open_jtalk_analyzer: OpenJTalkAnalyzer, + kana_analyzer: KanaAnalyzer, use_gpu: bool, } @@ -176,7 +178,8 @@ pub(crate) mod blocking { return Ok(Self { status, - open_jtalk, + open_jtalk_analyzer: OpenJTalkAnalyzer::new(open_jtalk), + kana_analyzer: KanaAnalyzer, use_gpu, }); @@ -457,7 +460,8 @@ pub(crate) mod blocking { kana: &str, style_id: StyleId, ) -> Result> { - self.replace_mora_data(&parse_kana(kana)?, style_id) + let accent_phrases = self.kana_analyzer.analyze(kana)?; + self.replace_mora_data(&accent_phrases, style_id) } /// AccentPhraseの配列の音高・音素長を、特定の声で生成しなおす。 @@ -743,75 +747,7 @@ pub(crate) mod blocking { text: &str, style_id: StyleId, ) -> Result> { - if text.is_empty() { - return Ok(Vec::new()); - } - - let utterance = Utterance::extract_full_context_label(&self.open_jtalk, text)?; - - let accent_phrases: Vec = utterance - .breath_groups() - .iter() - .enumerate() - .fold(Vec::new(), |mut accum_vec, (i, breath_group)| { - accum_vec.extend(breath_group.accent_phrases().iter().enumerate().map( - |(j, accent_phrase)| { - let moras = accent_phrase - .moras() - .iter() - .map(|mora| { - let mora_text = mora - .phonemes() - .iter() - .map(|phoneme| phoneme.phoneme().to_string()) - .collect::>() - .join(""); - - let (consonant, consonant_length) = - if let Some(consonant) = mora.consonant() { - (Some(consonant.phoneme().to_string()), Some(0.)) - } else { - (None, None) - }; - - MoraModel::new( - mora_to_text(mora_text), - consonant, - consonant_length, - mora.vowel().phoneme().into(), - 0., - 0., - ) - }) - .collect(); - - let pause_mora = if i != utterance.breath_groups().len() - 1 - && j == breath_group.accent_phrases().len() - 1 - { - Some(MoraModel::new( - "、".into(), - None, - None, - "pau".into(), - 0., - 0., - )) - } else { - None - }; - - AccentPhraseModel::new( - moras, - *accent_phrase.accent(), - pause_mora, - *accent_phrase.is_interrogative(), - ) - }, - )); - - accum_vec - }); - + let accent_phrases = self.open_jtalk_analyzer.analyze(text)?; self.replace_mora_data(&accent_phrases, style_id) } @@ -1175,21 +1111,6 @@ pub(crate) mod blocking { (consonant_phoneme_list, vowel_phoneme_list, vowel_indexes) } - fn mora_to_text(mora: impl AsRef) -> String { - let last_char = mora.as_ref().chars().last().unwrap(); - let mora = if ['A', 'I', 'U', 'E', 'O'].contains(&last_char) { - format!( - "{}{}", - &mora.as_ref()[0..mora.as_ref().len() - 1], - last_char.to_lowercase() - ) - } else { - mora.as_ref().to_string() - }; - // もしカタカナに変換できなければ、引数で与えた文字列がそのまま返ってくる - engine::mora2text(&mora).to_string() - } - impl AudioQueryModel { fn from_accent_phrases(accent_phrases: Vec) -> Self { let kana = create_kana(&accent_phrases); diff --git a/crates/voicevox_core/src/text_analyzer.rs b/crates/voicevox_core/src/text_analyzer.rs new file mode 100644 index 000000000..5ecb89d56 --- /dev/null +++ b/crates/voicevox_core/src/text_analyzer.rs @@ -0,0 +1,122 @@ +use crate::{ + engine::{self, parse_kana, MoraModel, Utterance}, + AccentPhraseModel, FullcontextExtractor, Result, +}; + +pub trait TextAnalyzer { + fn analyze(&self, text: &str) -> Result>; +} + +/// AquesTalk風記法からAccentPhraseの配列を生成するTextAnalyzer +#[derive(Clone)] +pub struct KanaAnalyzer; + +impl TextAnalyzer for KanaAnalyzer { + fn analyze(&self, text: &str) -> Result> { + if text.is_empty() { + return Ok(Vec::new()); + } + Ok(parse_kana(text)?) + } +} + +/// OpenJtalkからAccentPhraseの配列を生成するTextAnalyzer +#[derive(Clone)] +pub struct OpenJTalkAnalyzer(O); + +impl OpenJTalkAnalyzer { + pub fn new(open_jtalk: O) -> Self { + Self(open_jtalk) + } +} + +impl TextAnalyzer for OpenJTalkAnalyzer { + fn analyze(&self, text: &str) -> Result> { + if text.is_empty() { + return Ok(Vec::new()); + } + let utterance = Utterance::extract_full_context_label(&self.0, text)?; + Ok(utterance_to_accent_phrases(utterance)) + } +} + +fn utterance_to_accent_phrases(utterance: Utterance) -> Vec { + let accent_phrases: Vec = utterance.breath_groups().iter().enumerate().fold( + Vec::new(), + |mut accum_vec, (i, breath_group)| { + accum_vec.extend(breath_group.accent_phrases().iter().enumerate().map( + |(j, accent_phrase)| { + let moras = accent_phrase + .moras() + .iter() + .map(|mora| { + let mora_text = mora + .phonemes() + .iter() + .map(|phoneme| phoneme.phoneme().to_string()) + .collect::>() + .join(""); + + let (consonant, consonant_length) = + if let Some(consonant) = mora.consonant() { + (Some(consonant.phoneme().to_string()), Some(0.)) + } else { + (None, None) + }; + + MoraModel::new( + mora_to_text(mora_text), + consonant, + consonant_length, + mora.vowel().phoneme().into(), + 0., + 0., + ) + }) + .collect(); + + let pause_mora = if i != utterance.breath_groups().len() - 1 + && j == breath_group.accent_phrases().len() - 1 + { + Some(MoraModel::new( + "、".into(), + None, + None, + "pau".into(), + 0., + 0., + )) + } else { + None + }; + + AccentPhraseModel::new( + moras, + *accent_phrase.accent(), + pause_mora, + *accent_phrase.is_interrogative(), + ) + }, + )); + + accum_vec + }, + ); + + accent_phrases +} + +pub fn mora_to_text(mora: impl AsRef) -> String { + let last_char = mora.as_ref().chars().last().unwrap(); + let mora = if ['A', 'I', 'U', 'E', 'O'].contains(&last_char) { + format!( + "{}{}", + &mora.as_ref()[0..mora.as_ref().len() - 1], + last_char.to_lowercase() + ) + } else { + mora.as_ref().to_string() + }; + // もしカタカナに変換できなければ、引数で与えた文字列がそのまま返ってくる + engine::mora2text(&mora).to_string() +}