From 70f3a6c4e08afca406efd95f5c9ede913fa21a6c Mon Sep 17 00:00:00 2001 From: Ryo Yamashita Date: Sun, 3 Dec 2023 04:05:48 +0900 Subject: [PATCH 1/4] =?UTF-8?q?Rust=E3=81=AE=E3=83=96=E3=83=AD=E3=83=83?= =?UTF-8?q?=E3=82=AD=E3=83=B3=E3=82=B0API=E3=82=92=E5=AE=9F=E8=A3=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 61 ++++- Cargo.toml | 3 +- crates/voicevox_core/Cargo.toml | 3 + .../src/__internal/doctest_fixtures.rs | 10 +- crates/voicevox_core/src/blocking.rs | 6 + .../src/engine/full_context_label.rs | 22 +- crates/voicevox_core/src/engine/mod.rs | 6 +- crates/voicevox_core/src/engine/open_jtalk.rs | 136 +++++++--- crates/voicevox_core/src/infer/status.rs | 57 +++-- crates/voicevox_core/src/lib.rs | 8 +- crates/voicevox_core/src/synthesizer.rs | 176 +++++++------ crates/voicevox_core/src/test_util.rs | 8 +- crates/voicevox_core/src/tokio.rs | 6 + crates/voicevox_core/src/user_dict/dict.rs | 106 ++++++-- crates/voicevox_core/src/user_dict/mod.rs | 3 +- crates/voicevox_core/src/voice_model.rs | 236 ++++++++++++++---- crates/voicevox_core_c_api/src/c_impls.rs | 14 +- .../src/compatible_engine.rs | 16 +- crates/voicevox_core_c_api/src/lib.rs | 15 +- .../voicevox_core_java_api/src/open_jtalk.rs | 7 +- .../voicevox_core_java_api/src/synthesizer.rs | 37 +-- .../voicevox_core_java_api/src/user_dict.rs | 18 +- .../voicevox_core_java_api/src/voice_model.rs | 6 +- crates/voicevox_core_python_api/src/lib.rs | 14 +- 24 files changed, 673 insertions(+), 301 deletions(-) create mode 100644 crates/voicevox_core/src/blocking.rs create mode 100644 crates/voicevox_core/src/tokio.rs diff --git a/Cargo.lock b/Cargo.lock index 9b23148d3..deba74398 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -92,6 +92,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "aliasable" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "250f629c0161ad8107cf89319e990051fae62832fd343083bea452d93e2205fd" + [[package]] name = "android-tzdata" version = "0.1.1" @@ -1641,9 +1647,9 @@ checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" [[package]] name = "heck" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" [[package]] name = "hermit-abi" @@ -1999,6 +2005,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.4" @@ -2316,7 +2331,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af5a8477ac96877b5bd1fd67e0c28736c12943aba24eda92b127e036b0c8f400" dependencies = [ "indexmap 1.9.1", - "itertools", + "itertools 0.10.5", "ndarray", "noisy_float", "num-integer", @@ -2552,6 +2567,31 @@ version = "6.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ff7415e9ae3fff1225851df9e0d9e4e5479f947619774677a63572e55e80eff" +[[package]] +name = "ouroboros" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c86de06555b970aec45229b27291b53154f21a5743a163419f4e4c0b065dcde" +dependencies = [ + "aliasable", + "ouroboros_macro", + "static_assertions", +] + +[[package]] +name = "ouroboros_macro" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8cad0c4b129e9696e37cb712b243777b90ef489a0bfaa0ac34e7d9b860e4f134" +dependencies = [ + "heck", + "itertools 0.11.0", + "proc-macro-error", + "proc-macro2", + "quote", + "syn 2.0.38", +] + [[package]] name = "output_vt100" version = "0.1.3" @@ -2755,7 +2795,7 @@ checksum = "59230a63c37f3e18569bdb90e4a89cbf5bf8b06fea0b84e65ea10cc4df47addd" dependencies = [ "concolor", "difflib", - "itertools", + "itertools 0.10.5", "predicates-core", "yansi", ] @@ -3610,6 +3650,12 @@ dependencies = [ "version_check", ] +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "stdweb" version = "0.4.20" @@ -4311,13 +4357,15 @@ dependencies = [ "heck", "humansize", "indexmap 2.0.0", - "itertools", + "itertools 0.10.5", "nanoid", "ndarray", "once_cell", "onnxruntime", "open_jtalk", + "ouroboros", "pretty_assertions", + "rayon", "regex", "rstest", "serde", @@ -4330,6 +4378,7 @@ dependencies = [ "uuid", "voicevox_core_macros", "windows", + "zip", ] [[package]] @@ -4349,7 +4398,7 @@ dependencies = [ "easy-ext", "futures", "inventory", - "itertools", + "itertools 0.10.5", "libc", "libloading", "libtest-mimic", diff --git a/Cargo.toml b/Cargo.toml index 4df9fd108..61dabd8c1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,7 +33,7 @@ fs-err = "2.9.0" futures = "0.3.26" futures-core = "0.3.25" futures-util = "0.3.25" -heck = "0.4.0" +heck = "0.4.1" humansize = "2.1.2" indexmap = "2.0.0" indicatif = "0.17.3" @@ -49,6 +49,7 @@ ndarray = "0.15.6" ndarray-stats = "0.5.1" octocrab = { version = "0.19.0", default-features = false } once_cell = "1.18.0" +ouroboros = "0.18.0" parse-display = "0.8.2" pretty_assertions = "1.3.0" proc-macro2 = "1.0.69" diff --git a/crates/voicevox_core/Cargo.toml b/crates/voicevox_core/Cargo.toml index 457d32fc3..1b8904875 100644 --- a/crates/voicevox_core/Cargo.toml +++ b/crates/voicevox_core/Cargo.toml @@ -28,6 +28,8 @@ ndarray.workspace = true once_cell.workspace = true onnxruntime.workspace = true open_jtalk.workspace = true +ouroboros.workspace = true +rayon.workspace = true regex.workspace = true serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true, features = ["preserve_order"] } @@ -37,6 +39,7 @@ tokio = { workspace = true, features = ["rt"] } tracing.workspace = true uuid = { workspace = true, features = ["v4", "serde"] } voicevox_core_macros = { path = "../voicevox_core_macros" } +zip.workspace = true [dev-dependencies] heck.workspace = true diff --git a/crates/voicevox_core/src/__internal/doctest_fixtures.rs b/crates/voicevox_core/src/__internal/doctest_fixtures.rs index c9029079c..a27478d98 100644 --- a/crates/voicevox_core/src/__internal/doctest_fixtures.rs +++ b/crates/voicevox_core/src/__internal/doctest_fixtures.rs @@ -1,19 +1,19 @@ use std::path::Path; -use crate::{AccelerationMode, InitializeOptions, OpenJtalk, Synthesizer, VoiceModel}; +use crate::{AccelerationMode, InitializeOptions}; pub async fn synthesizer_with_sample_voice_model( open_jtalk_dic_dir: impl AsRef, -) -> anyhow::Result> { - let syntesizer = Synthesizer::new( - OpenJtalk::new(open_jtalk_dic_dir).await?, +) -> anyhow::Result> { + let syntesizer = crate::tokio::Synthesizer::new( + crate::tokio::OpenJtalk::new(open_jtalk_dic_dir).await?, &InitializeOptions { acceleration_mode: AccelerationMode::Cpu, ..Default::default() }, )?; - let model = &VoiceModel::from_path(concat!( + let model = &crate::tokio::VoiceModel::from_path(concat!( env!("CARGO_MANIFEST_DIR"), "/../../model/sample.vvm", )) diff --git a/crates/voicevox_core/src/blocking.rs b/crates/voicevox_core/src/blocking.rs new file mode 100644 index 000000000..aa600c598 --- /dev/null +++ b/crates/voicevox_core/src/blocking.rs @@ -0,0 +1,6 @@ +//! ブロッキング版API。 + +pub use crate::{ + engine::open_jtalk::blocking::OpenJtalk, synthesizer::blocking::Synthesizer, + user_dict::dict::blocking::UserDict, voice_model::blocking::VoiceModel, +}; diff --git a/crates/voicevox_core/src/engine/full_context_label.rs b/crates/voicevox_core/src/engine/full_context_label.rs index 667dbc8b1..978d85d6f 100644 --- a/crates/voicevox_core/src/engine/full_context_label.rs +++ b/crates/voicevox_core/src/engine/full_context_label.rs @@ -1,7 +1,8 @@ use std::collections::HashMap; -use super::*; -use crate::engine::open_jtalk::OpenjtalkFunctionError; +use crate::engine::open_jtalk::TextAnalyzer; +use derive_getters::Getters; +use derive_new::new; use once_cell::sync::Lazy; use regex::Regex; @@ -11,7 +12,7 @@ use regex::Regex; pub(crate) struct FullContextLabelError { context: ErrorKind, #[source] - source: Option, + source: Option, } #[derive(derive_more::Display, Debug)] @@ -316,16 +317,15 @@ impl Utterance { } pub(crate) fn extract_full_context_label( - open_jtalk: &open_jtalk::OpenJtalk, + open_jtalk: &impl TextAnalyzer, text: impl AsRef, ) -> Result { - let labels = - open_jtalk - .extract_fullcontext(text) - .map_err(|source| FullContextLabelError { - context: ErrorKind::OpenJtalk, - source: Some(source), - })?; + let labels = open_jtalk + .extract_fullcontext(text.as_ref()) + .map_err(|source| FullContextLabelError { + context: ErrorKind::OpenJtalk, + source: Some(source), + })?; labels .into_iter() diff --git a/crates/voicevox_core/src/engine/mod.rs b/crates/voicevox_core/src/engine/mod.rs index a0a073bbf..af117f37d 100644 --- a/crates/voicevox_core/src/engine/mod.rs +++ b/crates/voicevox_core/src/engine/mod.rs @@ -3,13 +3,11 @@ mod full_context_label; mod kana_parser; mod model; mod mora_list; -mod open_jtalk; - -use super::*; +pub(crate) mod open_jtalk; pub use self::acoustic_feature_extractor::*; pub use self::full_context_label::*; pub use self::kana_parser::*; pub use self::model::*; pub(crate) use self::mora_list::mora2text; -pub use self::open_jtalk::OpenJtalk; +pub use self::open_jtalk::TextAnalyzer; diff --git a/crates/voicevox_core/src/engine/open_jtalk.rs b/crates/voicevox_core/src/engine/open_jtalk.rs index 1cffe6757..6cdf1b3e8 100644 --- a/crates/voicevox_core/src/engine/open_jtalk.rs +++ b/crates/voicevox_core/src/engine/open_jtalk.rs @@ -7,7 +7,7 @@ use tempfile::NamedTempFile; use ::open_jtalk::*; -use crate::{error::ErrorRepr, UserDict}; +use crate::error::ErrorRepr; #[derive(thiserror::Error, Debug)] #[error("`{function}`の実行が失敗しました")] @@ -17,13 +17,6 @@ pub(crate) struct OpenjtalkFunctionError { source: Option, } -/// テキスト解析器としてのOpen JTalk。 -#[derive(Clone)] -pub struct OpenJtalk { - resources: Arc>, - dict_dir: Arc, // FIXME: `camino::Utf8PathBuf`にする -} - struct Resources { mecab: ManagedResource, njd: ManagedResource, @@ -33,16 +26,16 @@ struct Resources { #[allow(unsafe_code)] unsafe impl Send for Resources {} -impl OpenJtalk { - pub async fn new(open_jtalk_dict_dir: impl AsRef) -> crate::result::Result { +impl self::blocking::OpenJtalk { + pub fn new(open_jtalk_dict_dir: impl AsRef) -> crate::result::Result { let dict_dir = open_jtalk_dict_dir .as_ref() .to_str() .unwrap_or_else(|| todo!()) // FIXME: `camino::Utf8Path`を要求するようにする .to_owned(); - let dict_dir = Arc::new(dict_dir); - crate::task::asyncify(move || { + // FIXME: この`{}`はGitのdiffを抑えるためだけに存在 + { let mut resources = Resources { mecab: ManagedResource::initialize(), njd: ManagedResource::initialize(), @@ -55,24 +48,50 @@ impl OpenJtalk { return Err(ErrorRepr::NotLoadedOpenjtalkDict.into()); } - Ok(Self { - resources: Mutex::new(resources).into(), + Ok(Self(Arc::new(self::blocking::Inner { + resources: Mutex::new(resources), dict_dir, - }) - }) - .await + }))) + } } /// ユーザー辞書を設定する。 /// /// この関数を呼び出した後にユーザー辞書を変更した場合は、再度この関数を呼ぶ必要がある。 - pub async fn use_user_dict(&self, user_dict: &UserDict) -> crate::result::Result<()> { - let resources = self.resources.clone(); - let dict_dir = self.dict_dir.clone(); + pub fn use_user_dict( + &self, + user_dict: &crate::blocking::UserDict, + ) -> crate::result::Result<()> { + let words = &user_dict.to_mecab_format(); + self.0.use_user_dict(words) + } +} +impl self::tokio::OpenJtalk { + pub async fn new(open_jtalk_dict_dir: impl AsRef) -> crate::result::Result { + let open_jtalk_dict_dir = open_jtalk_dict_dir.as_ref().to_owned(); + let blocking = + crate::task::asyncify(|| self::blocking::OpenJtalk::new(open_jtalk_dict_dir)).await?; + Ok(Self(blocking)) + } + + /// ユーザー辞書を設定する。 + /// + /// この関数を呼び出した後にユーザー辞書を変更した場合は、再度この関数を呼ぶ必要がある。 + pub async fn use_user_dict( + &self, + user_dict: &crate::tokio::UserDict, + ) -> crate::result::Result<()> { + let inner = self.0 .0.clone(); let words = user_dict.to_mecab_format(); + crate::task::asyncify(move || inner.use_user_dict(&words)).await + } +} - let result = crate::task::asyncify(move || -> crate::Result<_> { +impl self::blocking::Inner { + // FIXME: 中断可能にする + fn use_user_dict(&self, words: &str) -> crate::result::Result<()> { + let result = { // ユーザー辞書用のcsvを作成 let mut temp_csv = NamedTempFile::new().map_err(|e| ErrorRepr::UseUserDict(e.into()))?; @@ -88,7 +107,7 @@ impl OpenJtalk { mecab_dict_index(&[ "mecab-dict-index", "-d", - &dict_dir, + &self.dict_dir, "-u", temp_dict_path.to_str().unwrap(), "-f", @@ -99,11 +118,10 @@ impl OpenJtalk { "-q", ]); - let Resources { mecab, .. } = &mut *resources.lock().unwrap(); + let Resources { mecab, .. } = &mut *self.resources.lock().unwrap(); - Ok(mecab.load_with_userdic((*dict_dir).as_ref(), Some(Path::new(&temp_dict_path)))) - }) - .await?; + mecab.load_with_userdic(self.dict_dir.as_ref(), Some(Path::new(&temp_dict_path))) + }; if !result { return Err(ErrorRepr::UseUserDict(anyhow!("辞書のコンパイルに失敗しました")).into()); @@ -111,22 +129,25 @@ impl OpenJtalk { Ok(()) } +} - pub(crate) fn extract_fullcontext( - &self, - text: impl AsRef, - ) -> std::result::Result, OpenjtalkFunctionError> { +pub trait TextAnalyzer: Clone + Send + Sync + 'static { + fn extract_fullcontext(&self, text: &str) -> anyhow::Result>; +} + +impl TextAnalyzer for self::blocking::OpenJtalk { + fn extract_fullcontext(&self, text: &str) -> anyhow::Result> { let Resources { mecab, njd, jpcommon, - } = &mut *self.resources.lock().unwrap(); + } = &mut *self.0.resources.lock().unwrap(); jpcommon.refresh(); njd.refresh(); mecab.refresh(); - let mecab_text = text2mecab(text.as_ref()).map_err(|e| OpenjtalkFunctionError { + let mecab_text = text2mecab(text).map_err(|e| OpenjtalkFunctionError { function: "text2mecab", source: Some(e), })?; @@ -153,21 +174,52 @@ impl OpenJtalk { source: None, }) .map(|iter| iter.map(|s| s.to_string()).collect()) + .map_err(Into::into) } else { Err(OpenjtalkFunctionError { function: "Mecab_analysis", source: None, - }) + } + .into()) } } } +impl TextAnalyzer for self::tokio::OpenJtalk { + fn extract_fullcontext(&self, text: &str) -> anyhow::Result> { + self.0.extract_fullcontext(text) + } +} + +pub(crate) mod blocking { + use std::sync::Arc; + + use super::Resources; + + /// テキスト解析器としてのOpen JTalk。 + #[derive(Clone)] + pub struct OpenJtalk(pub(super) Arc); + + pub(super) struct Inner { + pub(super) resources: std::sync::Mutex, + pub(super) dict_dir: String, // FIXME: `camino::Utf8PathBuf`にする + } +} + +pub(crate) mod tokio { + /// テキスト解析器としてのOpen JTalk。 + #[derive(Clone)] + pub struct OpenJtalk(pub(super) super::blocking::OpenJtalk); +} + #[cfg(test)] mod tests { - use super::*; use ::test_util::OPEN_JTALK_DIC_DIR; + use rstest::rstest; + + use crate::macros::tests::assert_debug_fmt_eq; - use crate::{macros::tests::assert_debug_fmt_eq, *}; + use super::{OpenjtalkFunctionError, TextAnalyzer as _}; fn testdata_hello_hiho() -> Vec { // こんにちは、ヒホです。の期待値 @@ -257,14 +309,16 @@ mod tests { } #[rstest] - #[case("", Err(OpenjtalkFunctionError { function: "Mecab_get_feature", source: None }))] + #[case("", Err(OpenjtalkFunctionError { function: "Mecab_get_feature", source: None }.into()))] #[case("こんにちは、ヒホです。", Ok(testdata_hello_hiho()))] #[tokio::test] async fn extract_fullcontext_works( #[case] text: &str, - #[case] expected: std::result::Result, OpenjtalkFunctionError>, + #[case] expected: anyhow::Result>, ) { - let open_jtalk = OpenJtalk::new(OPEN_JTALK_DIC_DIR).await.unwrap(); + let open_jtalk = super::tokio::OpenJtalk::new(OPEN_JTALK_DIC_DIR) + .await + .unwrap(); let result = open_jtalk.extract_fullcontext(text); assert_debug_fmt_eq!(expected, result); } @@ -274,9 +328,11 @@ mod tests { #[tokio::test] async fn extract_fullcontext_loop_works( #[case] text: &str, - #[case] expected: std::result::Result, OpenjtalkFunctionError>, + #[case] expected: anyhow::Result>, ) { - let open_jtalk = OpenJtalk::new(OPEN_JTALK_DIC_DIR).await.unwrap(); + let open_jtalk = super::tokio::OpenJtalk::new(OPEN_JTALK_DIC_DIR) + .await + .unwrap(); for _ in 0..10 { let result = open_jtalk.extract_fullcontext(text); assert_debug_fmt_eq!(expected, result); diff --git a/crates/voicevox_core/src/infer/status.rs b/crates/voicevox_core/src/infer/status.rs index 62805d37a..be0253e16 100644 --- a/crates/voicevox_core/src/infer/status.rs +++ b/crates/voicevox_core/src/infer/status.rs @@ -15,8 +15,8 @@ use crate::{ infer::{InferenceOperation, ParamInfo}, manifest::ModelInnerId, metas::{SpeakerMeta, StyleId, StyleMeta, VoiceModelMeta}, - voice_model::{VoiceModel, VoiceModelId}, - Result, + voice_model::VoiceModelId, + Result, VoiceModelHeader, }; use super::{ @@ -37,20 +37,20 @@ impl Status { } } - pub async fn load_model( + pub fn insert_model( &self, - model: &VoiceModel, + model_header: &VoiceModelHeader, model_bytes: &EnumMap>, ) -> Result<()> { self.loaded_models .lock() .unwrap() - .ensure_acceptable(model)?; + .ensure_acceptable(model_header)?; let session_set = SessionSet::new(model_bytes, &self.session_options).map_err(|source| { LoadModelError { - path: model.path().clone(), + path: model_header.path.clone(), context: LoadModelErrorKind::InvalidModelData, source: Some(source), } @@ -59,7 +59,7 @@ impl Status { self.loaded_models .lock() .unwrap() - .insert(model, session_set)?; + .insert(model_header, session_set)?; Ok(()) } @@ -180,24 +180,27 @@ impl LoadedModels { self.styles().any(|style| *style.id() == style_id) } - /// 与えられた`VoiceModel`を受け入れ可能かをチェックする。 + /// 音声モデルを受け入れ可能かをチェックする。 /// /// # Errors /// - /// 音声モデルIDかスタイルIDが`model`と重複するとき、エラーを返す。 - fn ensure_acceptable(&self, model: &VoiceModel) -> LoadModelResult<()> { + /// 音声モデルIDかスタイルIDが`model_header`と重複するとき、エラーを返す。 + fn ensure_acceptable(&self, model_header: &VoiceModelHeader) -> LoadModelResult<()> { let loaded = self.styles(); - let external = model.metas().iter().flat_map(|speaker| speaker.styles()); + let external = model_header + .metas + .iter() + .flat_map(|speaker| speaker.styles()); let error = |context| LoadModelError { - path: model.path().clone(), + path: model_header.path.clone(), context, source: None, }; - if self.0.contains_key(model.id()) { + if self.0.contains_key(&model_header.id) { return Err(error(LoadModelErrorKind::ModelAlreadyLoaded { - id: model.id().clone(), + id: model_header.id.clone(), })); } if let Some((style, _)) = @@ -210,14 +213,18 @@ impl LoadedModels { Ok(()) } - fn insert(&mut self, model: &VoiceModel, session_set: SessionSet) -> Result<()> { - self.ensure_acceptable(model)?; + fn insert( + &mut self, + model_header: &VoiceModelHeader, + session_set: SessionSet, + ) -> Result<()> { + self.ensure_acceptable(model_header)?; let prev = self.0.insert( - model.id().clone(), + model_header.id.clone(), LoadedModel { - model_inner_ids: model.model_inner_ids(), - metas: model.metas().clone(), + model_inner_ids: model_header.model_inner_ids(), + metas: model_header.metas.clone(), session_set, }, ); @@ -387,7 +394,7 @@ mod tests { ); let model = &open_default_vvm_file().await; let model_bytes = &model.read_inference_models().await.unwrap(); - let result = status.load_model(model, model_bytes).await; + let result = status.insert_model(model.header(), model_bytes); assert_debug_fmt_eq!(Ok(()), result); assert_eq!(1, status.loaded_models.lock().unwrap().0.len()); } @@ -399,13 +406,17 @@ mod tests { enum_map!(_ => InferenceSessionOptions::new(0, false)), ); let vvm = open_default_vvm_file().await; + let model_header = vvm.header(); let model_bytes = &vvm.read_inference_models().await.unwrap(); assert!( - !status.is_loaded_model(vvm.id()), + !status.is_loaded_model(&model_header.id), "model should not be loaded" ); - let result = status.load_model(&vvm, model_bytes).await; + let result = status.insert_model(model_header, model_bytes); assert_debug_fmt_eq!(Ok(()), result); - assert!(status.is_loaded_model(vvm.id()), "model should be loaded"); + assert!( + status.is_loaded_model(&model_header.id), + "model should be loaded", + ); } } diff --git a/crates/voicevox_core/src/lib.rs b/crates/voicevox_core/src/lib.rs index 78552a9f8..693d34b35 100644 --- a/crates/voicevox_core/src/lib.rs +++ b/crates/voicevox_core/src/lib.rs @@ -17,6 +17,8 @@ mod version; mod voice_model; pub mod __internal; +pub mod blocking; +pub mod tokio; #[cfg(test)] mod test_util; @@ -24,16 +26,14 @@ mod test_util; #[cfg(test)] use self::test_util::*; -pub use self::engine::{AccentPhraseModel, AudioQueryModel, OpenJtalk}; +pub use self::engine::{AccentPhraseModel, AudioQueryModel, TextAnalyzer}; pub use self::error::*; pub use self::metas::*; pub use self::result::*; pub use self::voice_model::*; pub use devices::*; pub use manifest::*; -pub use synthesizer::{ - AccelerationMode, InitializeOptions, SynthesisOptions, Synthesizer, TtsOptions, -}; +pub use synthesizer::{AccelerationMode, InitializeOptions, SynthesisOptions, TtsOptions}; pub use user_dict::*; pub use version::*; diff --git a/crates/voicevox_core/src/synthesizer.rs b/crates/voicevox_core/src/synthesizer.rs index 556877e08..e565f2599 100644 --- a/crates/voicevox_core/src/synthesizer.rs +++ b/crates/voicevox_core/src/synthesizer.rs @@ -1,13 +1,10 @@ -use std::{ - io::{Cursor, Write as _}, - sync::Arc, -}; +use std::io::{Cursor, Write as _}; use enum_map::enum_map; use crate::{ engine::{ - create_kana, parse_kana, AccentPhraseModel, MoraModel, OjtPhoneme, OpenJtalk, Utterance, + create_kana, parse_kana, AccentPhraseModel, MoraModel, OjtPhoneme, TextAnalyzer, Utterance, }, infer::{ domain::{ @@ -23,9 +20,10 @@ use crate::{ use super::*; -/// [`Synthesizer::synthesis`]のオプション。 +/// [`blocking::Synthesizer::synthesis`]および[`tokio::Synthesizer::synthesis`]のオプション。 /// -/// [`Synthesizer::synthesis`]: Synthesizer::synthesis +/// [`blocking::Synthesizer::synthesis`]: blocking::Synthesizer::synthesis +/// [`tokio::Synthesizer::synthesis`]: tokio::Synthesizer::synthesis #[derive(Clone)] pub struct SynthesisOptions { pub enable_interrogative_upspeak: bool, @@ -45,9 +43,10 @@ impl From<&TtsOptions> for SynthesisOptions { } } -/// [`Synthesizer::tts`]のオプション。 +/// [`blocking::Synthesizer::tts`]および[`tokio::Synthesizer::tts`]のオプション。 /// -/// [`Synthesizer::tts`]: Synthesizer::tts +/// [`blocking::Synthesizer::tts`]: blocking::Synthesizer::tts +/// [`tokio::Synthesizer::tts`]: tokio::Synthesizer::tts #[derive(Clone)] pub struct TtsOptions { pub enable_interrogative_upspeak: bool, @@ -79,9 +78,10 @@ pub enum AccelerationMode { Gpu, } -/// [`Synthesizer::new`]のオプション。 +/// [`blocking::Synthesizer::new`]および[`tokio::Synthesizer::new`]のオプション。 /// -/// [`Synthesizer::new`]: Synthesizer::new +/// [`blocking::Synthesizer::new`]: blocking::Synthesizer::new +/// [`tokio::Synthesizer::new`]: tokio::Synthesizer::new #[derive(Default)] pub struct InitializeOptions { pub acceleration_mode: AccelerationMode, @@ -92,14 +92,10 @@ const DEFAULT_SAMPLING_RATE: u32 = 24000; pub(crate) type InferenceRuntimeImpl = Onnxruntime; -/// 音声シンセサイザ。 -#[derive(Clone)] -pub struct Synthesizer(Arc>); - // FIXME: docを書く -impl Synthesizer { +impl self::tokio::Synthesizer { pub fn new(open_jtalk: O, options: &InitializeOptions) -> Result { - blocking::Synthesizer::new(open_jtalk, options) + self::blocking::Synthesizer::new(open_jtalk, options) .map(Into::into) .map(Self) } @@ -108,8 +104,9 @@ impl Synthesizer { self.0.is_gpu_mode() } - pub async fn load_voice_model(&self, model: &VoiceModel) -> Result<()> { - self.0.load_voice_model(model).await + pub async fn load_voice_model(&self, model: &crate::tokio::VoiceModel) -> Result<()> { + let model_bytes = &model.read_inference_models().await?; + self.0.status.insert_model(model.header(), model_bytes) } pub fn unload_voice_model(&self, voice_model_id: &VoiceModelId) -> Result<()> { @@ -213,7 +210,7 @@ impl Synthesizer { } } -impl Synthesizer { +impl self::tokio::Synthesizer { pub async fn create_accent_phrases( &self, text: &str, @@ -251,7 +248,7 @@ impl Synthesizer { // FIXME: ここのdocのコードブロックはasync版のものなので、↑の方に移した上で、(ブロッキング版を // public APIにするならの話ではあるが)ブロッキング版はブロッキング版でコード例を用意する -impl blocking::Synthesizer { +impl self::blocking::Synthesizer { /// `Synthesizer`をコンストラクトする。 /// /// # Example @@ -266,7 +263,10 @@ impl blocking::Synthesizer { /// # /// use std::sync::Arc; /// - /// use voicevox_core::{AccelerationMode, InitializeOptions, OpenJtalk, Synthesizer}; + /// use voicevox_core::{ + /// tokio::{OpenJtalk, Synthesizer}, + /// AccelerationMode, InitializeOptions, + /// }; /// /// let mut syntesizer = Synthesizer::new( /// Arc::new(OpenJtalk::new(OPEN_JTALK_DIC_DIR).await.unwrap()), @@ -279,7 +279,7 @@ impl blocking::Synthesizer { /// # Ok(()) /// # } /// ``` - fn new(open_jtalk: O, options: &InitializeOptions) -> Result { + pub fn new(open_jtalk: O, options: &InitializeOptions) -> Result { #[cfg(windows)] list_windows_video_cards(); @@ -336,24 +336,23 @@ impl blocking::Synthesizer { } /// ハードウェアアクセラレーションがGPUモードか判定する。 - fn is_gpu_mode(&self) -> bool { + pub fn is_gpu_mode(&self) -> bool { self.use_gpu } - // FIXME: ブロッキング版を作る /// 音声モデルを読み込む。 - async fn load_voice_model(&self, model: &VoiceModel) -> Result<()> { - let model_bytes = &model.read_inference_models().await?; - self.status.load_model(model, model_bytes).await + pub fn load_voice_model(&self, model: &crate::blocking::VoiceModel) -> Result<()> { + let model_bytes = &model.read_inference_models()?; + self.status.insert_model(model.header(), model_bytes) } /// 音声モデルの読み込みを解除する。 - fn unload_voice_model(&self, voice_model_id: &VoiceModelId) -> Result<()> { + pub fn unload_voice_model(&self, voice_model_id: &VoiceModelId) -> Result<()> { self.status.unload_model(voice_model_id) } /// 指定したIDの音声モデルが読み込まれているか判定する。 - fn is_loaded_voice_model(&self, voice_model_id: &VoiceModelId) -> bool { + pub fn is_loaded_voice_model(&self, voice_model_id: &VoiceModelId) -> bool { self.status.is_loaded_model(voice_model_id) } @@ -362,12 +361,12 @@ impl blocking::Synthesizer { } /// 今読み込んでいる音声モデルのメタ情報を返す。 - fn metas(&self) -> VoiceModelMeta { + pub fn metas(&self) -> VoiceModelMeta { self.status.metas() } /// AudioQueryから音声合成を行う。 - fn synthesis( + pub fn synthesis( &self, audio_query: &AudioQueryModel, style_id: StyleId, @@ -596,7 +595,7 @@ impl blocking::Synthesizer { /// # Ok(()) /// # } /// ``` - fn create_accent_phrases_from_kana( + pub fn create_accent_phrases_from_kana( &self, kana: &str, style_id: StyleId, @@ -605,7 +604,7 @@ impl blocking::Synthesizer { } } -impl blocking::Synthesizer { +impl self::blocking::Synthesizer { /// 日本語のテキストからAccentPhrase (アクセント句)の配列を生成する。 /// /// # Example @@ -629,7 +628,7 @@ impl blocking::Synthesizer { /// # Ok(()) /// # } /// ``` - fn create_accent_phrases( + pub fn create_accent_phrases( &self, text: &str, style_id: StyleId, @@ -707,9 +706,9 @@ impl blocking::Synthesizer { } } -impl blocking::Synthesizer { +impl self::blocking::Synthesizer { /// AccentPhraseの配列の音高・音素長を、特定の声で生成しなおす。 - fn replace_mora_data( + pub fn replace_mora_data( &self, accent_phrases: &[AccentPhraseModel], style_id: StyleId, @@ -719,7 +718,7 @@ impl blocking::Synthesizer { } /// AccentPhraseの配列の音素長を、特定の声で生成しなおす。 - fn replace_phoneme_length( + pub fn replace_phoneme_length( &self, accent_phrases: &[AccentPhraseModel], style_id: StyleId, @@ -779,7 +778,7 @@ impl blocking::Synthesizer { } /// AccentPhraseの配列の音高を、特定の声で生成しなおす。 - fn replace_mora_pitch( + pub fn replace_mora_pitch( &self, accent_phrases: &[AccentPhraseModel], style_id: StyleId, @@ -939,13 +938,13 @@ impl blocking::Synthesizer { /// ``` /// /// [AudioQuery]: crate::AudioQueryModel - fn audio_query_from_kana(&self, kana: &str, style_id: StyleId) -> Result { + pub fn audio_query_from_kana(&self, kana: &str, style_id: StyleId) -> Result { let accent_phrases = self.create_accent_phrases_from_kana(kana, style_id)?; Ok(AudioQueryModel::from_accent_phrases(accent_phrases).with_kana(Some(kana.to_owned()))) } } -impl blocking::Synthesizer { +impl self::blocking::Synthesizer { /// 日本語のテキストから[AudioQuery]を生成する。 /// /// # Examples @@ -971,15 +970,15 @@ impl blocking::Synthesizer { /// ``` /// /// [AudioQuery]: crate::AudioQueryModel - fn audio_query(&self, text: &str, style_id: StyleId) -> Result { + pub fn audio_query(&self, text: &str, style_id: StyleId) -> Result { let accent_phrases = self.create_accent_phrases(text, style_id)?; Ok(AudioQueryModel::from_accent_phrases(accent_phrases)) } } -impl blocking::Synthesizer { +impl self::blocking::Synthesizer { /// AquesTalk風記法から音声合成を行う。 - fn tts_from_kana( + pub fn tts_from_kana( &self, kana: &str, style_id: StyleId, @@ -990,9 +989,9 @@ impl blocking::Synthesizer { } } -impl blocking::Synthesizer { +impl self::blocking::Synthesizer { /// 日本語のテキストから音声合成を行う。 - fn tts(&self, text: &str, style_id: StyleId, options: &TtsOptions) -> Result> { + pub fn tts(&self, text: &str, style_id: StyleId, options: &TtsOptions) -> Result> { let audio_query = &self.audio_query(text, style_id)?; self.synthesis(audio_query, style_id, &SynthesisOptions::from(options)) } @@ -1039,7 +1038,7 @@ pub trait PerformInference { ) -> Result>; } -impl PerformInference for Synthesizer { +impl PerformInference for self::tokio::Synthesizer { fn predict_duration(&self, phoneme_vector: &[i64], style_id: StyleId) -> Result> { self.0.predict_duration(phoneme_vector, style_id) } @@ -1080,7 +1079,7 @@ impl PerformInference for Synthesizer { } } -impl PerformInference for blocking::Synthesizer { +impl PerformInference for self::blocking::Synthesizer { fn predict_duration(&self, phoneme_vector: &[i64], style_id: StyleId) -> Result> { // FIXME: `Status::ids_for`があるため、ここは不要なはず if !self.status.validate_speaker_id(style_id) { @@ -1393,30 +1392,43 @@ impl AudioQueryModel { } } -mod blocking { +pub(crate) mod blocking { use crate::infer::{domain::InferenceDomainImpl, status::Status}; use super::InferenceRuntimeImpl; - pub(super) struct Synthesizer { + /// 音声シンセサイザ。 + pub struct Synthesizer { pub(super) status: Status, pub(super) open_jtalk: O, pub(super) use_gpu: bool, } } +pub(crate) mod tokio { + use std::sync::Arc; + + /// 音声シンセサイザ。 + #[derive(Clone)] + pub struct Synthesizer(pub(super) Arc>); +} + #[cfg(test)] mod tests { - use super::*; - use crate::{engine::MoraModel, macros::tests::assert_debug_fmt_eq}; + use super::{AccelerationMode, InitializeOptions, PerformInference as _}; + use crate::{ + engine::MoraModel, macros::tests::assert_debug_fmt_eq, open_default_vvm_file, + AccentPhraseModel, Result, StyleId, + }; use ::test_util::OPEN_JTALK_DIC_DIR; + use rstest::rstest; #[rstest] #[case(Ok(()))] #[tokio::test] async fn load_model_works(#[case] expected_result_at_initialized: Result<()>) { - let syntesizer = Synthesizer::new( + let syntesizer = super::tokio::Synthesizer::new( (), &InitializeOptions { acceleration_mode: AccelerationMode::Cpu, @@ -1439,7 +1451,7 @@ mod tests { #[rstest] #[tokio::test] async fn is_use_gpu_works() { - let syntesizer = Synthesizer::new( + let syntesizer = super::tokio::Synthesizer::new( (), &InitializeOptions { acceleration_mode: AccelerationMode::Cpu, @@ -1455,7 +1467,7 @@ mod tests { #[tokio::test] async fn is_loaded_model_by_style_id_works(#[case] style_id: u32, #[case] expected: bool) { let style_id = StyleId::new(style_id); - let syntesizer = Synthesizer::new( + let syntesizer = super::tokio::Synthesizer::new( (), &InitializeOptions { acceleration_mode: AccelerationMode::Cpu, @@ -1483,7 +1495,7 @@ mod tests { #[rstest] #[tokio::test] async fn predict_duration_works() { - let syntesizer = Synthesizer::new( + let syntesizer = super::tokio::Synthesizer::new( (), &InitializeOptions { acceleration_mode: AccelerationMode::Cpu, @@ -1512,7 +1524,7 @@ mod tests { #[rstest] #[tokio::test] async fn predict_intonation_works() { - let syntesizer = Synthesizer::new( + let syntesizer = super::tokio::Synthesizer::new( (), &InitializeOptions { acceleration_mode: AccelerationMode::Cpu, @@ -1551,7 +1563,7 @@ mod tests { #[rstest] #[tokio::test] async fn decode_works() { - let syntesizer = Synthesizer::new( + let syntesizer = super::tokio::Synthesizer::new( (), &InitializeOptions { acceleration_mode: AccelerationMode::Cpu, @@ -1641,8 +1653,10 @@ mod tests { #[case] expected_text_consonant_vowel_data: &TextConsonantVowelData, #[case] expected_kana_text: &str, ) { - let syntesizer = Synthesizer::new( - OpenJtalk::new(OPEN_JTALK_DIC_DIR).await.unwrap(), + let syntesizer = super::tokio::Synthesizer::new( + crate::tokio::OpenJtalk::new(OPEN_JTALK_DIC_DIR) + .await + .unwrap(), &InitializeOptions { acceleration_mode: AccelerationMode::Cpu, ..Default::default() @@ -1650,7 +1664,7 @@ mod tests { ) .unwrap(); - let model = &VoiceModel::sample().await.unwrap(); + let model = &crate::tokio::VoiceModel::sample().await.unwrap(); syntesizer.load_voice_model(model).await.unwrap(); let query = match input { @@ -1709,8 +1723,10 @@ mod tests { #[case] input: Input, #[case] expected_text_consonant_vowel_data: &TextConsonantVowelData, ) { - let syntesizer = Synthesizer::new( - OpenJtalk::new(OPEN_JTALK_DIC_DIR).await.unwrap(), + let syntesizer = super::tokio::Synthesizer::new( + crate::tokio::OpenJtalk::new(OPEN_JTALK_DIC_DIR) + .await + .unwrap(), &InitializeOptions { acceleration_mode: AccelerationMode::Cpu, ..Default::default() @@ -1718,7 +1734,7 @@ mod tests { ) .unwrap(); - let model = &VoiceModel::sample().await.unwrap(); + let model = &crate::tokio::VoiceModel::sample().await.unwrap(); syntesizer.load_voice_model(model).await.unwrap(); let accent_phrases = match input { @@ -1774,8 +1790,10 @@ mod tests { #[rstest] #[tokio::test] async fn create_accent_phrases_works_for_japanese_commas_and_periods() { - let syntesizer = Synthesizer::new( - OpenJtalk::new(OPEN_JTALK_DIC_DIR).await.unwrap(), + let syntesizer = super::tokio::Synthesizer::new( + crate::tokio::OpenJtalk::new(OPEN_JTALK_DIC_DIR) + .await + .unwrap(), &InitializeOptions { acceleration_mode: AccelerationMode::Cpu, ..Default::default() @@ -1783,7 +1801,7 @@ mod tests { ) .unwrap(); - let model = &VoiceModel::sample().await.unwrap(); + let model = &crate::tokio::VoiceModel::sample().await.unwrap(); syntesizer.load_voice_model(model).await.unwrap(); let accent_phrases = syntesizer @@ -1833,8 +1851,10 @@ mod tests { #[rstest] #[tokio::test] async fn mora_length_works() { - let syntesizer = Synthesizer::new( - OpenJtalk::new(OPEN_JTALK_DIC_DIR).await.unwrap(), + let syntesizer = super::tokio::Synthesizer::new( + crate::tokio::OpenJtalk::new(OPEN_JTALK_DIC_DIR) + .await + .unwrap(), &InitializeOptions { acceleration_mode: AccelerationMode::Cpu, ..Default::default() @@ -1842,7 +1862,7 @@ mod tests { ) .unwrap(); - let model = &VoiceModel::sample().await.unwrap(); + let model = &crate::tokio::VoiceModel::sample().await.unwrap(); syntesizer.load_voice_model(model).await.unwrap(); let accent_phrases = syntesizer @@ -1869,8 +1889,10 @@ mod tests { #[rstest] #[tokio::test] async fn mora_pitch_works() { - let syntesizer = Synthesizer::new( - OpenJtalk::new(OPEN_JTALK_DIC_DIR).await.unwrap(), + let syntesizer = super::tokio::Synthesizer::new( + crate::tokio::OpenJtalk::new(OPEN_JTALK_DIC_DIR) + .await + .unwrap(), &InitializeOptions { acceleration_mode: AccelerationMode::Cpu, ..Default::default() @@ -1878,7 +1900,7 @@ mod tests { ) .unwrap(); - let model = &VoiceModel::sample().await.unwrap(); + let model = &crate::tokio::VoiceModel::sample().await.unwrap(); syntesizer.load_voice_model(model).await.unwrap(); let accent_phrases = syntesizer @@ -1901,8 +1923,10 @@ mod tests { #[rstest] #[tokio::test] async fn mora_data_works() { - let syntesizer = Synthesizer::new( - OpenJtalk::new(OPEN_JTALK_DIC_DIR).await.unwrap(), + let syntesizer = super::tokio::Synthesizer::new( + crate::tokio::OpenJtalk::new(OPEN_JTALK_DIC_DIR) + .await + .unwrap(), &InitializeOptions { acceleration_mode: AccelerationMode::Cpu, ..Default::default() @@ -1910,7 +1934,7 @@ mod tests { ) .unwrap(); - let model = &VoiceModel::sample().await.unwrap(); + let model = &crate::tokio::VoiceModel::sample().await.unwrap(); syntesizer.load_voice_model(model).await.unwrap(); let accent_phrases = syntesizer diff --git a/crates/voicevox_core/src/test_util.rs b/crates/voicevox_core/src/test_util.rs index 822d7e7fe..926fe45bb 100644 --- a/crates/voicevox_core/src/test_util.rs +++ b/crates/voicevox_core/src/test_util.rs @@ -1,9 +1,9 @@ use std::path::PathBuf; -use crate::{Result, VoiceModel}; +use crate::Result; -pub async fn open_default_vvm_file() -> VoiceModel { - VoiceModel::from_path( +pub async fn open_default_vvm_file() -> crate::tokio::VoiceModel { + crate::tokio::VoiceModel::from_path( ::test_util::convert_zip_vvm( PathBuf::from(env!("CARGO_WORKSPACE_DIR")) .join(file!()) @@ -18,7 +18,7 @@ pub async fn open_default_vvm_file() -> VoiceModel { .unwrap() } -impl VoiceModel { +impl crate::tokio::VoiceModel { pub(crate) async fn sample() -> Result { return Self::from_path(PATH).await; diff --git a/crates/voicevox_core/src/tokio.rs b/crates/voicevox_core/src/tokio.rs new file mode 100644 index 000000000..49451a310 --- /dev/null +++ b/crates/voicevox_core/src/tokio.rs @@ -0,0 +1,6 @@ +//! Tokio版API。 + +pub use crate::{ + engine::open_jtalk::tokio::OpenJtalk, synthesizer::tokio::Synthesizer, + user_dict::dict::tokio::UserDict, voice_model::tokio::VoiceModel, +}; diff --git a/crates/voicevox_core/src/user_dict/dict.rs b/crates/voicevox_core/src/user_dict/dict.rs index 79534d9e3..28dec224c 100644 --- a/crates/voicevox_core/src/user_dict/dict.rs +++ b/crates/voicevox_core/src/user_dict/dict.rs @@ -5,14 +5,7 @@ use uuid::Uuid; use super::word::*; use crate::{error::ErrorRepr, Result}; -/// ユーザー辞書。 -/// 単語はJSONとの相互変換のために挿入された順序を保つ。 -#[derive(Debug, Default)] -pub struct UserDict { - words: std::sync::Mutex>, -} - -impl UserDict { +impl self::blocking::UserDict { /// ユーザー辞書を作成する。 pub fn new() -> Self { Default::default() @@ -31,13 +24,12 @@ impl UserDict { /// # Errors /// /// ファイルが読めなかった、または内容が不正だった場合はエラーを返す。 - pub async fn load(&self, store_path: &str) -> Result<()> { - let words = async { - let words = &fs_err::tokio::read(store_path).await?; + pub fn load(&self, store_path: &str) -> Result<()> { + let words = (|| { + let words = &fs_err::read(store_path)?; let words = serde_json::from_slice::>(words)?; Ok(words) - } - .await + })() .map_err(ErrorRepr::LoadUserDict)?; self.words.lock().unwrap().extend(words); @@ -78,12 +70,11 @@ impl UserDict { } /// ユーザー辞書を保存する。 - pub async fn save(&self, store_path: &str) -> Result<()> { - fs_err::tokio::write( + pub fn save(&self, store_path: &str) -> Result<()> { + fs_err::write( store_path, serde_json::to_vec(&self.words).expect("should not fail"), ) - .await .map_err(|e| ErrorRepr::SaveUserDict(e.into()).into()) } @@ -99,3 +90,86 @@ impl UserDict { ) } } + +impl self::tokio::UserDict { + /// ユーザー辞書を作成する。 + pub fn new() -> Self { + Self(self::blocking::UserDict::new().into()) + } + + pub fn to_json(&self) -> String { + self.0.to_json() + } + + pub fn with_words(&self, f: impl FnOnce(&IndexMap) -> R) -> R { + self.0.with_words(f) + } + + /// ユーザー辞書をファイルから読み込む。 + /// + /// # Errors + /// + /// ファイルが読めなかった、または内容が不正だった場合はエラーを返す。 + pub async fn load(&self, store_path: &str) -> Result<()> { + let blocking = self.0.clone(); + let store_path = store_path.to_owned(); + crate::task::asyncify(move || blocking.load(&store_path)).await + } + + /// ユーザー辞書に単語を追加する。 + pub fn add_word(&self, word: UserDictWord) -> Result { + self.0.add_word(word) + } + + /// ユーザー辞書の単語を変更する。 + pub fn update_word(&self, word_uuid: Uuid, new_word: UserDictWord) -> Result<()> { + self.0.update_word(word_uuid, new_word) + } + + /// ユーザー辞書から単語を削除する。 + pub fn remove_word(&self, word_uuid: Uuid) -> Result { + self.0.remove_word(word_uuid) + } + + /// 他のユーザー辞書をインポートする。 + pub fn import(&self, other: &Self) -> Result<()> { + self.0.import(&other.0) + } + + /// ユーザー辞書を保存する。 + pub async fn save(&self, store_path: &str) -> Result<()> { + let blocking = self.0.clone(); + let store_path = store_path.to_owned(); + crate::task::asyncify(move || blocking.save(&store_path)).await + } + + /// MeCabで使用する形式に変換する。 + pub(crate) fn to_mecab_format(&self) -> String { + self.0.to_mecab_format() + } +} + +pub(crate) mod blocking { + use indexmap::IndexMap; + use uuid::Uuid; + + use super::UserDictWord; + + /// ユーザー辞書。 + /// + /// 単語はJSONとの相互変換のために挿入された順序を保つ。 + #[derive(Debug, Default)] + pub struct UserDict { + pub(super) words: std::sync::Mutex>, + } +} + +pub(crate) mod tokio { + use std::sync::Arc; + + /// ユーザー辞書。 + /// + /// 単語はJSONとの相互変換のために挿入された順序を保つ。 + #[derive(Debug, Default)] + pub struct UserDict(pub(super) Arc); +} diff --git a/crates/voicevox_core/src/user_dict/mod.rs b/crates/voicevox_core/src/user_dict/mod.rs index 58def046f..1c6302e15 100644 --- a/crates/voicevox_core/src/user_dict/mod.rs +++ b/crates/voicevox_core/src/user_dict/mod.rs @@ -1,6 +1,5 @@ -mod dict; +pub(crate) mod dict; mod part_of_speech_data; mod word; -pub use dict::*; pub use word::*; diff --git a/crates/voicevox_core/src/voice_model.rs b/crates/voicevox_core/src/voice_model.rs index 829bbf43d..63b7e77e1 100644 --- a/crates/voicevox_core/src/voice_model.rs +++ b/crates/voicevox_core/src/voice_model.rs @@ -1,13 +1,14 @@ -use async_zip::{read::fs::ZipFileReader, ZipEntry}; use enum_map::EnumMap; use futures::future::join3; +use ouroboros::self_referencing; +use rayon::iter::{IntoParallelIterator as _, ParallelIterator as _}; use serde::{de::DeserializeOwned, Deserialize}; use super::*; use crate::infer::domain::InferenceOperationImpl; use std::{ collections::{BTreeMap, HashMap}, - io, + io::{self, Cursor}, path::{Path, PathBuf}, }; @@ -24,29 +25,118 @@ pub struct VoiceModelId { raw_voice_model_id: RawVoiceModelId, } -/// 音声モデル。 -/// -/// VVMファイルと対応する。 -#[derive(Getters, Clone)] -pub struct VoiceModel { +impl self::blocking::VoiceModel { + pub(crate) fn read_inference_models( + &self, + ) -> LoadModelResult>> { + let reader = BlockingVvmEntryReader::open(&self.header.path)?; + + let model_bytes = [ + self.header.manifest.decode_filename(), + self.header.manifest.predict_duration_filename(), + self.header.manifest.predict_intonation_filename(), + ] + .into_par_iter() + .map(|filename| reader.read_vvm_entry(filename)) + .collect::, _>>()? + .try_into() + .unwrap_or_else(|_| panic!("should be same length")); + + Ok(EnumMap::from_array(model_bytes)) + } + + /// VVMファイルから`VoiceModel`をコンストラクトする。 + pub fn from_path(path: impl AsRef) -> crate::Result { + let path = path.as_ref().to_owned(); + let reader = BlockingVvmEntryReader::open(&path)?; + let manifest = reader.read_vvm_json::("manifest.json")?; + let metas = reader.read_vvm_json(manifest.metas_filename())?; + let id = VoiceModelId::new(nanoid!()); + + Ok(Self { + header: VoiceModelHeader { + id, + metas, + manifest, + path, + }, + }) + } + /// ID。 - id: VoiceModelId, - manifest: Manifest, + pub fn id(&self) -> &VoiceModelId { + &self.header.id + } + /// メタ情報。 - metas: VoiceModelMeta, + pub fn metas(&self) -> &VoiceModelMeta { + &self.header.metas + } + + pub(crate) fn header(&self) -> &VoiceModelHeader { + &self.header + } +} + +#[self_referencing] +struct BlockingVvmEntryReader { path: PathBuf, + zip: Vec, + #[covariant] + #[borrows(zip)] + reader: zip::ZipArchive>, +} + +impl BlockingVvmEntryReader { + fn open(path: &Path) -> LoadModelResult { + (|| { + let zip = std::fs::read(path)?; + Self::try_new(path.to_owned(), zip, |zip| { + zip::ZipArchive::new(Cursor::new(zip)) + }) + })() + .map_err(|source| LoadModelError { + path: path.to_owned(), + context: LoadModelErrorKind::OpenZipFile, + source: Some(source.into()), + }) + } + + fn read_vvm_json(&self, filename: &str) -> LoadModelResult { + let bytes = &self.read_vvm_entry(filename)?; + serde_json::from_slice(bytes).map_err(|source| LoadModelError { + path: self.borrow_path().clone(), + context: LoadModelErrorKind::OpenZipFile, + source: Some(source.into()), + }) + } + + fn read_vvm_entry(&self, filename: &str) -> LoadModelResult> { + (|| { + let mut reader = self.borrow_reader().clone(); + let mut entry = reader.by_name(filename)?; + let mut buf = Vec::with_capacity(entry.size() as _); + io::copy(&mut entry, &mut buf)?; + Ok(buf) + })() + .map_err(|source| LoadModelError { + path: self.borrow_path().clone(), + context: LoadModelErrorKind::OpenZipFile, + source: Some(source), + }) + } } -impl VoiceModel { +impl self::tokio::VoiceModel { pub(crate) async fn read_inference_models( &self, ) -> LoadModelResult>> { - let reader = VvmEntryReader::open(&self.path).await?; + let reader = AsyncVvmEntryReader::open(&self.header.path).await?; let (decode_model_result, predict_duration_model_result, predict_intonation_model_result) = join3( - reader.read_vvm_entry(self.manifest.decode_filename()), - reader.read_vvm_entry(self.manifest.predict_duration_filename()), - reader.read_vvm_entry(self.manifest.predict_intonation_filename()), + reader.read_vvm_entry(self.header.manifest.decode_filename()), + reader.read_vvm_entry(self.header.manifest.predict_duration_filename()), + reader.read_vvm_entry(self.header.manifest.predict_intonation_filename()), ) .await; @@ -58,7 +148,7 @@ impl VoiceModel { } /// VVMファイルから`VoiceModel`をコンストラクトする。 pub async fn from_path(path: impl AsRef) -> Result { - let reader = VvmEntryReader::open(path.as_ref()).await?; + let reader = AsyncVvmEntryReader::open(path.as_ref()).await?; let manifest = reader.read_vvm_json::("manifest.json").await?; let metas = reader .read_vvm_json::(manifest.metas_filename()) @@ -66,48 +156,44 @@ impl VoiceModel { let id = VoiceModelId::new(nanoid!()); Ok(Self { - id, - metas, - manifest, - path: path.as_ref().into(), + header: VoiceModelHeader { + id, + metas, + manifest, + path: path.as_ref().into(), + }, }) } - /// モデル内のすべてのスタイルに対するモデル内IDを取得する。 - /// - /// モデル内IDのマッピングが存在しない場合はそのままスタイルIDを返す。 - pub(crate) fn model_inner_ids(&self) -> BTreeMap { - self.metas - .iter() - .flat_map(SpeakerMeta::styles) - .map(StyleMeta::id) - .map(|&style_id| { - let model_inner_id = self - .manifest - .style_id_to_model_inner_id() - .get(&style_id) - .copied() - .unwrap_or_else(|| ModelInnerId::new(style_id.raw_id())); - (style_id, model_inner_id) - }) - .collect() + /// ID。 + pub fn id(&self) -> &VoiceModelId { + &self.header.id + } + + /// メタ情報。 + pub fn metas(&self) -> &VoiceModelMeta { + &self.header.metas + } + + pub(crate) fn header(&self) -> &VoiceModelHeader { + &self.header } } -struct VvmEntry { +struct AsyncVvmEntry { index: usize, - entry: ZipEntry, + entry: async_zip::ZipEntry, } #[derive(new)] -struct VvmEntryReader { - reader: ZipFileReader, - entry_map: HashMap, +struct AsyncVvmEntryReader { + reader: async_zip::read::fs::ZipFileReader, + entry_map: HashMap, } -impl VvmEntryReader { +impl AsyncVvmEntryReader { async fn open(path: &Path) -> LoadModelResult { - let reader = ZipFileReader::new(path) + let reader = async_zip::read::fs::ZipFileReader::new(path) .await .map_err(|source| LoadModelError { path: path.to_owned(), @@ -123,14 +209,14 @@ impl VvmEntryReader { .map(|(i, e)| { ( e.entry().filename().to_string(), - VvmEntry { + AsyncVvmEntry { index: i, entry: e.entry().clone(), }, ) }) .collect(); - Ok(VvmEntryReader::new(reader, entry_map)) + Ok(AsyncVvmEntryReader::new(reader, entry_map)) } async fn read_vvm_json(&self, filename: &str) -> LoadModelResult { let bytes = self.read_vvm_entry(filename).await?; @@ -166,3 +252,59 @@ impl VvmEntryReader { }) } } + +#[derive(Clone)] +pub(crate) struct VoiceModelHeader { + /// ID。 + pub(crate) id: VoiceModelId, + manifest: Manifest, + /// メタ情報。 + pub(crate) metas: VoiceModelMeta, + pub(crate) path: PathBuf, +} + +impl VoiceModelHeader { + /// モデル内のすべてのスタイルに対するモデル内IDを取得する。 + /// + /// モデル内IDのマッピングが存在しない場合はそのままスタイルIDを返す。 + pub(crate) fn model_inner_ids(&self) -> BTreeMap { + self.metas + .iter() + .flat_map(SpeakerMeta::styles) + .map(StyleMeta::id) + .map(|&style_id| { + let model_inner_id = self + .manifest + .style_id_to_model_inner_id() + .get(&style_id) + .copied() + .unwrap_or_else(|| ModelInnerId::new(style_id.raw_id())); + (style_id, model_inner_id) + }) + .collect() + } +} + +pub(crate) mod blocking { + use super::VoiceModelHeader; + + /// 音声モデル。 + /// + /// VVMファイルと対応する。 + #[derive(Clone)] + pub struct VoiceModel { + pub(super) header: VoiceModelHeader, + } +} + +pub(crate) mod tokio { + use super::VoiceModelHeader; + + /// 音声モデル。 + /// + /// VVMファイルと対応する。 + #[derive(Clone)] + pub struct VoiceModel { + pub(super) header: VoiceModelHeader, + } +} diff --git a/crates/voicevox_core_c_api/src/c_impls.rs b/crates/voicevox_core_c_api/src/c_impls.rs index aa1421049..61665f48f 100644 --- a/crates/voicevox_core_c_api/src/c_impls.rs +++ b/crates/voicevox_core_c_api/src/c_impls.rs @@ -1,13 +1,13 @@ use std::{ffi::CString, path::Path}; -use voicevox_core::{InitializeOptions, OpenJtalk, Result, Synthesizer, VoiceModel, VoiceModelId}; +use voicevox_core::{InitializeOptions, Result, VoiceModelId}; use crate::{CApiResult, OpenJtalkRc, VoicevoxSynthesizer, VoicevoxVoiceModel}; impl OpenJtalkRc { pub(crate) async fn new(open_jtalk_dic_dir: impl AsRef) -> Result { Ok(Self { - open_jtalk: OpenJtalk::new(open_jtalk_dic_dir).await?, + open_jtalk: voicevox_core::tokio::OpenJtalk::new(open_jtalk_dic_dir).await?, }) } } @@ -18,11 +18,15 @@ impl VoicevoxSynthesizer { // FIXME: `into_result_code_with_error`を`run`とかに改名し、`init_logger`をその中に移動 let _ = *crate::RUNTIME; - let synthesizer = Synthesizer::new(open_jtalk.open_jtalk.clone(), options)?; + let synthesizer = + voicevox_core::tokio::Synthesizer::new(open_jtalk.open_jtalk.clone(), options)?; Ok(Self { synthesizer }) } - pub(crate) async fn load_voice_model(&self, model: &VoiceModel) -> CApiResult<()> { + pub(crate) async fn load_voice_model( + &self, + model: &voicevox_core::tokio::VoiceModel, + ) -> CApiResult<()> { self.synthesizer.load_voice_model(model).await?; Ok(()) } @@ -40,7 +44,7 @@ impl VoicevoxSynthesizer { impl VoicevoxVoiceModel { pub(crate) async fn from_path(path: impl AsRef) -> Result { - let model = VoiceModel::from_path(path).await?; + let model = voicevox_core::tokio::VoiceModel::from_path(path).await?; let id = CString::new(model.id().raw_voice_model_id().as_str()).unwrap(); let metas = CString::new(serde_json::to_string(model.metas()).unwrap()).unwrap(); Ok(Self { model, id, metas }) diff --git a/crates/voicevox_core_c_api/src/compatible_engine.rs b/crates/voicevox_core_c_api/src/compatible_engine.rs index cf19d9d3b..694753827 100644 --- a/crates/voicevox_core_c_api/src/compatible_engine.rs +++ b/crates/voicevox_core_c_api/src/compatible_engine.rs @@ -3,7 +3,7 @@ use std::collections::BTreeMap; use super::*; use libc::c_int; -use voicevox_core::{StyleId, VoiceModel, __internal::interop::PerformInference as _}; +use voicevox_core::{StyleId, __internal::interop::PerformInference as _}; macro_rules! ensure_initialized { ($synthesizer:expr $(,)?) => { @@ -20,10 +20,10 @@ macro_rules! ensure_initialized { static ERROR_MESSAGE: Lazy> = Lazy::new(|| Mutex::new(String::new())); struct VoiceModelSet { - all_vvms: Vec, + all_vvms: Vec, all_metas_json: CString, style_model_map: BTreeMap, - model_map: BTreeMap, + model_map: BTreeMap, } static VOICE_MODEL_SET: Lazy = Lazy::new(|| { @@ -52,7 +52,7 @@ static VOICE_MODEL_SET: Lazy = Lazy::new(|| { /// # Panics /// /// 失敗したらパニックする - async fn get_all_models() -> Vec { + async fn get_all_models() -> Vec { let root_dir = if let Some(root_dir) = env::var_os(ROOT_DIR_ENV_NAME) { root_dir.into() } else { @@ -70,7 +70,7 @@ static VOICE_MODEL_SET: Lazy = Lazy::new(|| { .unwrap_or_else(|e| panic!("{}が読めませんでした: {e}", root_dir.display())) .into_iter() .filter(|entry| entry.path().extension().map_or(false, |ext| ext == "vvm")) - .map(|entry| VoiceModel::from_path(entry.path())); + .map(|entry| voicevox_core::tokio::VoiceModel::from_path(entry.path())); futures::future::join_all(vvm_paths) .await @@ -88,10 +88,10 @@ fn voice_model_set() -> &'static VoiceModelSet { &VOICE_MODEL_SET } -static SYNTHESIZER: Lazy>>> = +static SYNTHESIZER: Lazy>>> = Lazy::new(|| Mutex::new(None)); -fn lock_synthesizer() -> MutexGuard<'static, Option>> { +fn lock_synthesizer() -> MutexGuard<'static, Option>> { SYNTHESIZER.lock().unwrap() } @@ -107,7 +107,7 @@ pub extern "C" fn initialize(use_gpu: bool, cpu_num_threads: c_int, load_all_mod // FIXME: ここはもう`RUNTIME.block_on`で包む必要は無くなっているのだが、ロガーの設定を`RUNTIME` // で行っているという構造になってしまっているので、外すとロガーの初期化が遅れてしまでう let result = RUNTIME.block_on(async { - let synthesizer = voicevox_core::Synthesizer::new( + let synthesizer = voicevox_core::tokio::Synthesizer::new( (), &voicevox_core::InitializeOptions { acceleration_mode: if use_gpu { diff --git a/crates/voicevox_core_c_api/src/lib.rs b/crates/voicevox_core_c_api/src/lib.rs index b9e50ba5d..1be2cafc6 100644 --- a/crates/voicevox_core_c_api/src/lib.rs +++ b/crates/voicevox_core_c_api/src/lib.rs @@ -29,11 +29,8 @@ use tokio::runtime::Runtime; use tracing_subscriber::fmt::format::Writer; use tracing_subscriber::EnvFilter; use uuid::Uuid; -use voicevox_core::{ - AccentPhraseModel, AudioQueryModel, OpenJtalk, TtsOptions, UserDictWord, VoiceModel, - VoiceModelId, -}; -use voicevox_core::{StyleId, SupportedDevices, SynthesisOptions, Synthesizer}; +use voicevox_core::{AccentPhraseModel, AudioQueryModel, TtsOptions, UserDictWord, VoiceModelId}; +use voicevox_core::{StyleId, SupportedDevices, SynthesisOptions}; static RUNTIME: Lazy = Lazy::new(|| { let _ = init_logger(); @@ -104,7 +101,7 @@ static RUNTIME: Lazy = Lazy::new(|| { /// ``` /// } pub struct OpenJtalkRc { - open_jtalk: OpenJtalk, + open_jtalk: voicevox_core::tokio::OpenJtalk, } /// ::OpenJtalkRc を構築(_construct_)する。 @@ -231,7 +228,7 @@ pub extern "C" fn voicevox_get_version() -> *const c_char { /// 構築(_construction_)は ::voicevox_voice_model_new_from_path で行い、破棄(_destruction_)は ::voicevox_voice_model_delete で行う。 #[derive(Getters)] pub struct VoicevoxVoiceModel { - model: VoiceModel, + model: voicevox_core::tokio::VoiceModel, id: CString, metas: CString, } @@ -317,7 +314,7 @@ pub extern "C" fn voicevox_voice_model_delete(model: Box) { /// 構築(_construction_)は ::voicevox_synthesizer_new で行い、破棄(_destruction_)は ::voicevox_synthesizer_delete で行う。 #[derive(Getters)] pub struct VoicevoxSynthesizer { - synthesizer: Synthesizer, + synthesizer: voicevox_core::tokio::Synthesizer, } /// ::VoicevoxSynthesizer を構築(_construct_)する。 @@ -1034,7 +1031,7 @@ pub extern "C" fn voicevox_error_result_to_message( /// ユーザー辞書。 #[derive(Default)] pub struct VoicevoxUserDict { - dict: Arc, + dict: Arc, } /// ユーザー辞書の単語。 diff --git a/crates/voicevox_core_java_api/src/open_jtalk.rs b/crates/voicevox_core_java_api/src/open_jtalk.rs index 422dd0fa8..b2242e984 100644 --- a/crates/voicevox_core_java_api/src/open_jtalk.rs +++ b/crates/voicevox_core_java_api/src/open_jtalk.rs @@ -16,7 +16,8 @@ unsafe extern "system" fn Java_jp_hiroshiba_voicevoxcore_OpenJtalk_rsNew<'local> let open_jtalk_dict_dir = env.get_string(&open_jtalk_dict_dir)?; let open_jtalk_dict_dir = &*Cow::from(&open_jtalk_dict_dir); - let internal = RUNTIME.block_on(voicevox_core::OpenJtalk::new(open_jtalk_dict_dir))?; + let internal = + RUNTIME.block_on(voicevox_core::tokio::OpenJtalk::new(open_jtalk_dict_dir))?; env.set_rust_field(&this, "handle", internal)?; Ok(()) @@ -31,11 +32,11 @@ unsafe extern "system" fn Java_jp_hiroshiba_voicevoxcore_OpenJtalk_rsUseUserDict ) { throw_if_err(env, (), |env| { let internal = env - .get_rust_field::<_, _, voicevox_core::OpenJtalk>(&this, "handle")? + .get_rust_field::<_, _, voicevox_core::tokio::OpenJtalk>(&this, "handle")? .clone(); let user_dict = env - .get_rust_field::<_, _, Arc>(&user_dict, "handle")? + .get_rust_field::<_, _, Arc>(&user_dict, "handle")? .clone(); RUNTIME.block_on(internal.use_user_dict(&user_dict))?; diff --git a/crates/voicevox_core_java_api/src/synthesizer.rs b/crates/voicevox_core_java_api/src/synthesizer.rs index 8828c6e78..e36e0b9c2 100644 --- a/crates/voicevox_core_java_api/src/synthesizer.rs +++ b/crates/voicevox_core_java_api/src/synthesizer.rs @@ -50,9 +50,10 @@ unsafe extern "system" fn Java_jp_hiroshiba_voicevoxcore_Synthesizer_rsNew<'loca options.cpu_num_threads = cpu_num_threads.i().expect("cpuNumThreads is not integer") as u16; let open_jtalk = env - .get_rust_field::<_, _, voicevox_core::OpenJtalk>(&open_jtalk, "handle")? + .get_rust_field::<_, _, voicevox_core::tokio::OpenJtalk>(&open_jtalk, "handle")? .clone(); - let internal = voicevox_core::Synthesizer::new(open_jtalk, Box::leak(Box::new(options)))?; + let internal = + voicevox_core::tokio::Synthesizer::new(open_jtalk, Box::leak(Box::new(options)))?; env.set_rust_field(&this, "handle", internal)?; Ok(()) }) @@ -64,7 +65,7 @@ unsafe extern "system" fn Java_jp_hiroshiba_voicevoxcore_Synthesizer_rsIsGpuMode ) -> jboolean { throw_if_err(env, false, |env| { let internal = env - .get_rust_field::<_, _, voicevox_core::Synthesizer>( + .get_rust_field::<_, _, voicevox_core::tokio::Synthesizer>( &this, "handle", )? .clone(); @@ -80,7 +81,7 @@ unsafe extern "system" fn Java_jp_hiroshiba_voicevoxcore_Synthesizer_rsGetMetasJ ) -> jobject { throw_if_err(env, std::ptr::null_mut(), |env| { let internal = env - .get_rust_field::<_, _, voicevox_core::Synthesizer>( + .get_rust_field::<_, _, voicevox_core::tokio::Synthesizer>( &this, "handle", )? .clone(); @@ -101,10 +102,10 @@ unsafe extern "system" fn Java_jp_hiroshiba_voicevoxcore_Synthesizer_rsLoadVoice ) { throw_if_err(env, (), |env| { let model = env - .get_rust_field::<_, _, Arc>(&model, "handle")? + .get_rust_field::<_, _, Arc>(&model, "handle")? .clone(); let internal = env - .get_rust_field::<_, _, voicevox_core::Synthesizer>( + .get_rust_field::<_, _, voicevox_core::tokio::Synthesizer>( &this, "handle", )? .clone(); @@ -123,7 +124,7 @@ unsafe extern "system" fn Java_jp_hiroshiba_voicevoxcore_Synthesizer_rsUnloadVoi let model_id: String = env.get_string(&model_id)?.into(); let internal = env - .get_rust_field::<_, _, voicevox_core::Synthesizer>( + .get_rust_field::<_, _, voicevox_core::tokio::Synthesizer>( &this, "handle", )? .clone(); @@ -146,7 +147,7 @@ unsafe extern "system" fn Java_jp_hiroshiba_voicevoxcore_Synthesizer_rsIsLoadedV let model_id: String = env.get_string(&model_id)?.into(); let internal = env - .get_rust_field::<_, _, voicevox_core::Synthesizer>( + .get_rust_field::<_, _, voicevox_core::tokio::Synthesizer>( &this, "handle", )? .clone(); @@ -172,7 +173,7 @@ unsafe extern "system" fn Java_jp_hiroshiba_voicevoxcore_Synthesizer_rsAudioQuer let style_id = style_id as u32; let internal = env - .get_rust_field::<_, _, voicevox_core::Synthesizer>( + .get_rust_field::<_, _, voicevox_core::tokio::Synthesizer>( &this, "handle", )? .clone(); @@ -201,7 +202,7 @@ unsafe extern "system" fn Java_jp_hiroshiba_voicevoxcore_Synthesizer_rsAudioQuer let style_id = style_id as u32; let internal = env - .get_rust_field::<_, _, voicevox_core::Synthesizer>( + .get_rust_field::<_, _, voicevox_core::tokio::Synthesizer>( &this, "handle", )? .clone(); @@ -231,7 +232,7 @@ unsafe extern "system" fn Java_jp_hiroshiba_voicevoxcore_Synthesizer_rsAccentPhr let style_id = style_id as u32; let internal = env - .get_rust_field::<_, _, voicevox_core::Synthesizer>( + .get_rust_field::<_, _, voicevox_core::tokio::Synthesizer>( &this, "handle", )? .clone(); @@ -260,7 +261,7 @@ unsafe extern "system" fn Java_jp_hiroshiba_voicevoxcore_Synthesizer_rsAccentPhr let style_id = style_id as u32; let internal = env - .get_rust_field::<_, _, voicevox_core::Synthesizer>( + .get_rust_field::<_, _, voicevox_core::tokio::Synthesizer>( &this, "handle", )? .clone(); @@ -291,7 +292,7 @@ unsafe extern "system" fn Java_jp_hiroshiba_voicevoxcore_Synthesizer_rsReplaceMo let style_id = style_id as u32; let internal = env - .get_rust_field::<_, _, voicevox_core::Synthesizer>( + .get_rust_field::<_, _, voicevox_core::tokio::Synthesizer>( &this, "handle", )? .clone(); @@ -323,7 +324,7 @@ unsafe extern "system" fn Java_jp_hiroshiba_voicevoxcore_Synthesizer_rsReplacePh let style_id = style_id as u32; let internal = env - .get_rust_field::<_, _, voicevox_core::Synthesizer>( + .get_rust_field::<_, _, voicevox_core::tokio::Synthesizer>( &this, "handle", )? .clone(); @@ -356,7 +357,7 @@ unsafe extern "system" fn Java_jp_hiroshiba_voicevoxcore_Synthesizer_rsReplaceMo let style_id = style_id as u32; let internal = env - .get_rust_field::<_, _, voicevox_core::Synthesizer>( + .get_rust_field::<_, _, voicevox_core::tokio::Synthesizer>( &this, "handle", )? .clone(); @@ -387,7 +388,7 @@ unsafe extern "system" fn Java_jp_hiroshiba_voicevoxcore_Synthesizer_rsSynthesis let style_id = style_id as u32; let internal = env - .get_rust_field::<_, _, voicevox_core::Synthesizer>( + .get_rust_field::<_, _, voicevox_core::tokio::Synthesizer>( &this, "handle", )? .clone(); @@ -423,7 +424,7 @@ unsafe extern "system" fn Java_jp_hiroshiba_voicevoxcore_Synthesizer_rsTtsFromKa let style_id = style_id as u32; let internal = env - .get_rust_field::<_, _, voicevox_core::Synthesizer>( + .get_rust_field::<_, _, voicevox_core::tokio::Synthesizer>( &this, "handle", )? .clone(); @@ -459,7 +460,7 @@ unsafe extern "system" fn Java_jp_hiroshiba_voicevoxcore_Synthesizer_rsTts<'loca let style_id = style_id as u32; let internal = env - .get_rust_field::<_, _, voicevox_core::Synthesizer>( + .get_rust_field::<_, _, voicevox_core::tokio::Synthesizer>( &this, "handle", )? .clone(); diff --git a/crates/voicevox_core_java_api/src/user_dict.rs b/crates/voicevox_core_java_api/src/user_dict.rs index abc90253f..7eb2722f4 100644 --- a/crates/voicevox_core_java_api/src/user_dict.rs +++ b/crates/voicevox_core_java_api/src/user_dict.rs @@ -14,7 +14,7 @@ unsafe extern "system" fn Java_jp_hiroshiba_voicevoxcore_UserDict_rsNew<'local>( this: JObject<'local>, ) { throw_if_err(env, (), |env| { - let internal = voicevox_core::UserDict::new(); + let internal = voicevox_core::tokio::UserDict::new(); env.set_rust_field(&this, "handle", Arc::new(internal))?; @@ -30,7 +30,7 @@ unsafe extern "system" fn Java_jp_hiroshiba_voicevoxcore_UserDict_rsAddWord<'loc ) -> jobject { throw_if_err(env, std::ptr::null_mut(), |env| { let internal = env - .get_rust_field::<_, _, Arc>(&this, "handle")? + .get_rust_field::<_, _, Arc>(&this, "handle")? .clone(); let word_json = env.get_string(&word_json)?; @@ -55,7 +55,7 @@ unsafe extern "system" fn Java_jp_hiroshiba_voicevoxcore_UserDict_rsUpdateWord<' ) { throw_if_err(env, (), |env| { let internal = env - .get_rust_field::<_, _, Arc>(&this, "handle")? + .get_rust_field::<_, _, Arc>(&this, "handle")? .clone(); let uuid = env.get_string(&uuid)?; @@ -80,7 +80,7 @@ unsafe extern "system" fn Java_jp_hiroshiba_voicevoxcore_UserDict_rsRemoveWord<' ) { throw_if_err(env, (), |env| { let internal = env - .get_rust_field::<_, _, Arc>(&this, "handle")? + .get_rust_field::<_, _, Arc>(&this, "handle")? .clone(); let uuid = env.get_string(&uuid)?; @@ -100,10 +100,10 @@ unsafe extern "system" fn Java_jp_hiroshiba_voicevoxcore_UserDict_rsImportDict<' ) { throw_if_err(env, (), |env| { let internal = env - .get_rust_field::<_, _, Arc>(&this, "handle")? + .get_rust_field::<_, _, Arc>(&this, "handle")? .clone(); let other_dict = env - .get_rust_field::<_, _, Arc>(&other_dict, "handle")? + .get_rust_field::<_, _, Arc>(&other_dict, "handle")? .clone(); internal.import(&other_dict)?; @@ -120,7 +120,7 @@ unsafe extern "system" fn Java_jp_hiroshiba_voicevoxcore_UserDict_rsLoad<'local> ) { throw_if_err(env, (), |env| { let internal = env - .get_rust_field::<_, _, Arc>(&this, "handle")? + .get_rust_field::<_, _, Arc>(&this, "handle")? .clone(); let path = env.get_string(&path)?; @@ -140,7 +140,7 @@ unsafe extern "system" fn Java_jp_hiroshiba_voicevoxcore_UserDict_rsSave<'local> ) { throw_if_err(env, (), |env| { let internal = env - .get_rust_field::<_, _, Arc>(&this, "handle")? + .get_rust_field::<_, _, Arc>(&this, "handle")? .clone(); let path = env.get_string(&path)?; @@ -159,7 +159,7 @@ unsafe extern "system" fn Java_jp_hiroshiba_voicevoxcore_UserDict_rsGetWords<'lo ) -> jobject { throw_if_err(env, std::ptr::null_mut(), |env| { let internal = env - .get_rust_field::<_, _, Arc>(&this, "handle")? + .get_rust_field::<_, _, Arc>(&this, "handle")? .clone(); let words = internal.to_json(); diff --git a/crates/voicevox_core_java_api/src/voice_model.rs b/crates/voicevox_core_java_api/src/voice_model.rs index cd971a1f7..d0ede9365 100644 --- a/crates/voicevox_core_java_api/src/voice_model.rs +++ b/crates/voicevox_core_java_api/src/voice_model.rs @@ -17,7 +17,7 @@ unsafe extern "system" fn Java_jp_hiroshiba_voicevoxcore_VoiceModel_rsFromPath<' let model_path = env.get_string(&model_path)?; let model_path = &*Cow::from(&model_path); - let internal = RUNTIME.block_on(voicevox_core::VoiceModel::from_path(model_path))?; + let internal = RUNTIME.block_on(voicevox_core::tokio::VoiceModel::from_path(model_path))?; env.set_rust_field(&this, "handle", Arc::new(internal))?; @@ -32,7 +32,7 @@ unsafe extern "system" fn Java_jp_hiroshiba_voicevoxcore_VoiceModel_rsGetId<'loc ) -> jobject { throw_if_err(env, std::ptr::null_mut(), |env| { let internal = env - .get_rust_field::<_, _, Arc>(&this, "handle")? + .get_rust_field::<_, _, Arc>(&this, "handle")? .clone(); let id = internal.id().raw_voice_model_id(); @@ -50,7 +50,7 @@ unsafe extern "system" fn Java_jp_hiroshiba_voicevoxcore_VoiceModel_rsGetMetasJs ) -> jobject { throw_if_err(env, std::ptr::null_mut(), |env| { let internal = env - .get_rust_field::<_, _, Arc>(&this, "handle")? + .get_rust_field::<_, _, Arc>(&this, "handle")? .clone(); let metas = internal.metas(); diff --git a/crates/voicevox_core_python_api/src/lib.rs b/crates/voicevox_core_python_api/src/lib.rs index 956c924b2..e841afda1 100644 --- a/crates/voicevox_core_python_api/src/lib.rs +++ b/crates/voicevox_core_python_api/src/lib.rs @@ -73,7 +73,7 @@ exceptions! { #[pyclass] #[derive(Clone)] struct VoiceModel { - model: voicevox_core::VoiceModel, + model: voicevox_core::tokio::VoiceModel, } #[pyfunction] @@ -94,7 +94,7 @@ impl VoiceModel { #[pyo3(from_py_with = "from_utf8_path")] path: String, ) -> PyResult<&PyAny> { pyo3_asyncio::tokio::future_into_py(py, async move { - let model = voicevox_core::VoiceModel::from_path(path).await; + let model = voicevox_core::tokio::VoiceModel::from_path(path).await; let model = Python::with_gil(|py| model.into_py_result(py))?; Ok(Self { model }) }) @@ -114,7 +114,7 @@ impl VoiceModel { #[pyclass] #[derive(Clone)] struct OpenJtalk { - open_jtalk: voicevox_core::OpenJtalk, + open_jtalk: voicevox_core::tokio::OpenJtalk, } #[pymethods] @@ -126,7 +126,7 @@ impl OpenJtalk { py: Python<'_>, ) -> PyResult<&PyAny> { pyo3_asyncio::tokio::future_into_py(py, async move { - let open_jtalk = voicevox_core::OpenJtalk::new(open_jtalk_dict_dir).await; + let open_jtalk = voicevox_core::tokio::OpenJtalk::new(open_jtalk_dict_dir).await; let open_jtalk = Python::with_gil(|py| open_jtalk.into_py_result(py))?; Ok(Self { open_jtalk }) }) @@ -144,7 +144,7 @@ impl OpenJtalk { #[pyclass] struct Synthesizer { - synthesizer: Closable, Self>, + synthesizer: Closable, Self>, } #[pymethods] @@ -160,7 +160,7 @@ impl Synthesizer { #[pyo3(from_py_with = "from_acceleration_mode")] acceleration_mode: AccelerationMode, cpu_num_threads: u16, ) -> PyResult { - let synthesizer = voicevox_core::Synthesizer::new( + let synthesizer = voicevox_core::tokio::Synthesizer::new( open_jtalk.open_jtalk.clone(), &InitializeOptions { acceleration_mode, @@ -530,7 +530,7 @@ fn _to_zenkaku(text: &str) -> PyResult { #[pyclass] #[derive(Default, Debug, Clone)] struct UserDict { - dict: Arc, + dict: Arc, } #[pymethods] From d166174cbdd67e1cbb56bc086e5d57ccfa981c4b Mon Sep 17 00:00:00 2001 From: Ryo Yamashita Date: Sun, 3 Dec 2023 04:35:57 +0900 Subject: [PATCH 2/4] =?UTF-8?q?`blocking::VoiceModel::read=5Finference=5Fm?= =?UTF-8?q?odels`=E3=81=AE=E9=A0=86=E7=95=AA=E3=82=92=E9=96=93=E9=81=95?= =?UTF-8?q?=E3=81=88=E3=81=A6=E3=81=84=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/voicevox_core/src/voice_model.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/voicevox_core/src/voice_model.rs b/crates/voicevox_core/src/voice_model.rs index 63b7e77e1..b1773db1f 100644 --- a/crates/voicevox_core/src/voice_model.rs +++ b/crates/voicevox_core/src/voice_model.rs @@ -32,9 +32,9 @@ impl self::blocking::VoiceModel { let reader = BlockingVvmEntryReader::open(&self.header.path)?; let model_bytes = [ - self.header.manifest.decode_filename(), self.header.manifest.predict_duration_filename(), self.header.manifest.predict_intonation_filename(), + self.header.manifest.decode_filename(), ] .into_par_iter() .map(|filename| reader.read_vvm_entry(filename)) From 8082da8e2cd3c3f8495d9ea4141397a673680e1d Mon Sep 17 00:00:00 2001 From: Ryo Yamashita Date: Mon, 4 Dec 2023 03:41:16 +0900 Subject: [PATCH 3/4] =?UTF-8?q?`TextAnalyzer`=20=E2=86=92=20`FullcontextEx?= =?UTF-8?q?tractor`?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/voicevox_core/src/engine/full_context_label.rs | 4 ++-- crates/voicevox_core/src/engine/mod.rs | 2 +- crates/voicevox_core/src/engine/open_jtalk.rs | 8 ++++---- crates/voicevox_core/src/lib.rs | 2 +- crates/voicevox_core/src/synthesizer.rs | 11 ++++++----- 5 files changed, 14 insertions(+), 13 deletions(-) diff --git a/crates/voicevox_core/src/engine/full_context_label.rs b/crates/voicevox_core/src/engine/full_context_label.rs index 978d85d6f..efe419579 100644 --- a/crates/voicevox_core/src/engine/full_context_label.rs +++ b/crates/voicevox_core/src/engine/full_context_label.rs @@ -1,6 +1,6 @@ use std::collections::HashMap; -use crate::engine::open_jtalk::TextAnalyzer; +use crate::engine::open_jtalk::FullcontextExtractor; use derive_getters::Getters; use derive_new::new; use once_cell::sync::Lazy; @@ -317,7 +317,7 @@ impl Utterance { } pub(crate) fn extract_full_context_label( - open_jtalk: &impl TextAnalyzer, + open_jtalk: &impl FullcontextExtractor, text: impl AsRef, ) -> Result { let labels = open_jtalk diff --git a/crates/voicevox_core/src/engine/mod.rs b/crates/voicevox_core/src/engine/mod.rs index af117f37d..505fc5033 100644 --- a/crates/voicevox_core/src/engine/mod.rs +++ b/crates/voicevox_core/src/engine/mod.rs @@ -10,4 +10,4 @@ pub use self::full_context_label::*; pub use self::kana_parser::*; pub use self::model::*; pub(crate) use self::mora_list::mora2text; -pub use self::open_jtalk::TextAnalyzer; +pub use self::open_jtalk::FullcontextExtractor; diff --git a/crates/voicevox_core/src/engine/open_jtalk.rs b/crates/voicevox_core/src/engine/open_jtalk.rs index 6cdf1b3e8..b64440126 100644 --- a/crates/voicevox_core/src/engine/open_jtalk.rs +++ b/crates/voicevox_core/src/engine/open_jtalk.rs @@ -131,11 +131,11 @@ impl self::blocking::Inner { } } -pub trait TextAnalyzer: Clone + Send + Sync + 'static { +pub trait FullcontextExtractor: Clone + Send + Sync + 'static { fn extract_fullcontext(&self, text: &str) -> anyhow::Result>; } -impl TextAnalyzer for self::blocking::OpenJtalk { +impl FullcontextExtractor for self::blocking::OpenJtalk { fn extract_fullcontext(&self, text: &str) -> anyhow::Result> { let Resources { mecab, @@ -185,7 +185,7 @@ impl TextAnalyzer for self::blocking::OpenJtalk { } } -impl TextAnalyzer for self::tokio::OpenJtalk { +impl FullcontextExtractor for self::tokio::OpenJtalk { fn extract_fullcontext(&self, text: &str) -> anyhow::Result> { self.0.extract_fullcontext(text) } @@ -219,7 +219,7 @@ mod tests { use crate::macros::tests::assert_debug_fmt_eq; - use super::{OpenjtalkFunctionError, TextAnalyzer as _}; + use super::{FullcontextExtractor as _, OpenjtalkFunctionError}; fn testdata_hello_hiho() -> Vec { // こんにちは、ヒホです。の期待値 diff --git a/crates/voicevox_core/src/lib.rs b/crates/voicevox_core/src/lib.rs index 693d34b35..6bbf7866a 100644 --- a/crates/voicevox_core/src/lib.rs +++ b/crates/voicevox_core/src/lib.rs @@ -26,7 +26,7 @@ mod test_util; #[cfg(test)] use self::test_util::*; -pub use self::engine::{AccentPhraseModel, AudioQueryModel, TextAnalyzer}; +pub use self::engine::{AccentPhraseModel, AudioQueryModel, FullcontextExtractor}; pub use self::error::*; pub use self::metas::*; pub use self::result::*; diff --git a/crates/voicevox_core/src/synthesizer.rs b/crates/voicevox_core/src/synthesizer.rs index e565f2599..c178a0ddd 100644 --- a/crates/voicevox_core/src/synthesizer.rs +++ b/crates/voicevox_core/src/synthesizer.rs @@ -4,7 +4,8 @@ use enum_map::enum_map; use crate::{ engine::{ - create_kana, parse_kana, AccentPhraseModel, MoraModel, OjtPhoneme, TextAnalyzer, Utterance, + create_kana, parse_kana, AccentPhraseModel, FullcontextExtractor, MoraModel, OjtPhoneme, + Utterance, }, infer::{ domain::{ @@ -210,7 +211,7 @@ impl self::tokio::Synthesizer { } } -impl self::tokio::Synthesizer { +impl self::tokio::Synthesizer { pub async fn create_accent_phrases( &self, text: &str, @@ -604,7 +605,7 @@ impl self::blocking::Synthesizer { } } -impl self::blocking::Synthesizer { +impl self::blocking::Synthesizer { /// 日本語のテキストからAccentPhrase (アクセント句)の配列を生成する。 /// /// # Example @@ -944,7 +945,7 @@ impl self::blocking::Synthesizer { } } -impl self::blocking::Synthesizer { +impl self::blocking::Synthesizer { /// 日本語のテキストから[AudioQuery]を生成する。 /// /// # Examples @@ -989,7 +990,7 @@ impl self::blocking::Synthesizer { } } -impl self::blocking::Synthesizer { +impl self::blocking::Synthesizer { /// 日本語のテキストから音声合成を行う。 pub fn tts(&self, text: &str, style_id: StyleId, options: &TtsOptions) -> Result> { let audio_query = &self.audio_query(text, style_id)?; From d04ed553ddcd11d7d4429d300816ce1a5ccb545e Mon Sep 17 00:00:00 2001 From: Ryo Yamashita Date: Mon, 4 Dec 2023 03:57:04 +0900 Subject: [PATCH 4/4] =?UTF-8?q?`VoiceModelHeader`=E3=81=AB=E3=82=B3?= =?UTF-8?q?=E3=83=A1=E3=83=B3=E3=83=88?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/voicevox_core/src/voice_model.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crates/voicevox_core/src/voice_model.rs b/crates/voicevox_core/src/voice_model.rs index b1773db1f..bee10e343 100644 --- a/crates/voicevox_core/src/voice_model.rs +++ b/crates/voicevox_core/src/voice_model.rs @@ -253,6 +253,10 @@ impl AsyncVvmEntryReader { } } +// FIXME: "header"といいつつ、VVMのファイルパスを持っている状態になっている。 +/// 音声モデルが持つ、各モデルファイルの実体を除く情報。 +/// +/// モデルの`[u8]`と分けて`Status`に渡す。 #[derive(Clone)] pub(crate) struct VoiceModelHeader { /// ID。