From 9c3a94e663263595e9e9df0c0b763620c0e1615c Mon Sep 17 00:00:00 2001 From: Ryo Yamashita Date: Fri, 24 May 2024 08:00:24 +0900 Subject: [PATCH] =?UTF-8?q?change:=20`style=5Fid=5Fto=5Fmodel=5Finner=5Fid?= =?UTF-8?q?`=20=E2=86=92=20`style=5Fid=5Fto=5Finner=5Fvoice=5Fid`=20(#795)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/voicevox_core/src/manifest.rs | 16 ++++---- crates/voicevox_core/src/status.rs | 50 ++++++++++++------------- crates/voicevox_core/src/synthesizer.rs | 12 +++--- crates/voicevox_core/src/voice_model.rs | 24 ++++++------ model/sample.vvm/manifest.json | 2 +- 5 files changed, 52 insertions(+), 52 deletions(-) diff --git a/crates/voicevox_core/src/manifest.rs b/crates/voicevox_core/src/manifest.rs index 3b17ae3f1..a22b66e8b 100644 --- a/crates/voicevox_core/src/manifest.rs +++ b/crates/voicevox_core/src/manifest.rs @@ -19,18 +19,18 @@ impl Display for ManifestVersion { } /// モデル内IDの実体 -pub type RawModelInnerId = u32; +pub type RawInnerVoiceId = u32; /// モデル内ID #[derive(PartialEq, Eq, Clone, Copy, Ord, PartialOrd, Deserialize, Serialize, new, Debug)] -pub struct ModelInnerId(RawModelInnerId); +pub struct InnerVoiceId(RawInnerVoiceId); -impl ModelInnerId { - pub fn raw_id(self) -> RawModelInnerId { +impl InnerVoiceId { + pub fn raw_id(self) -> RawInnerVoiceId { self.0 } } -impl Display for ModelInnerId { +impl Display for InnerVoiceId { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", self.raw_id()) } @@ -58,12 +58,12 @@ pub(crate) struct TalkManifest { pub(crate) predict_intonation_filename: String, pub(crate) decode_filename: String, #[serde(default)] - pub(crate) style_id_to_model_inner_id: StyleIdToModelInnerId, + pub(crate) style_id_to_inner_voice_id: StyleIdToInnerVoiceId, } #[serde_as] #[derive(Default, Clone, Deref, Deserialize)] #[deref(forward)] -pub(crate) struct StyleIdToModelInnerId( - #[serde_as(as = "Arc>")] Arc>, +pub(crate) struct StyleIdToInnerVoiceId( + #[serde_as(as = "Arc>")] Arc>, ); diff --git a/crates/voicevox_core/src/status.rs b/crates/voicevox_core/src/status.rs index a47de689b..6980ab7fc 100644 --- a/crates/voicevox_core/src/status.rs +++ b/crates/voicevox_core/src/status.rs @@ -14,9 +14,9 @@ use crate::{ InferenceDomain, InferenceInputSignature, InferenceRuntime, InferenceSessionOptions, InferenceSignature, }, - manifest::{ModelInnerId, StyleIdToModelInnerId}, + manifest::{InnerVoiceId, StyleIdToInnerVoiceId}, metas::{self, SpeakerMeta, StyleId, StyleMeta, VoiceModelMeta}, - voice_model::{ModelBytesWithInnerIdsByDomain, VoiceModelHeader, VoiceModelId}, + voice_model::{ModelBytesWithInnerVoiceIdsByDomain, VoiceModelHeader, VoiceModelId}, Result, }; @@ -36,7 +36,7 @@ impl Status { pub(crate) fn insert_model( &self, model_header: &VoiceModelHeader, - model_contents: &InferenceDomainMap, + model_contents: &InferenceDomainMap, ) -> Result<()> { self.loaded_models .lock() @@ -66,14 +66,14 @@ impl Status { self.loaded_models.lock().unwrap().metas() } - /// あるスタイルに対応する`VoiceModelId`と`ModelInnerId`の組を返す。 + /// あるスタイルに対応する`VoiceModelId`と`InnerVoiceId`の組を返す。 /// - /// `StyleId` → `ModelInnerId`のマッピングが存在しない場合は、`ModelInnerId`としては + /// `StyleId` → `InnerVoiceId`のマッピングが存在しない場合は、`InnerVoiceId`としては /// `style_id`と同じ値を返す。 pub(crate) fn ids_for( &self, style_id: StyleId, - ) -> Result<(VoiceModelId, ModelInnerId)> { + ) -> Result<(VoiceModelId, InnerVoiceId)> { self.loaded_models.lock().unwrap().ids_for::(style_id) } @@ -122,7 +122,7 @@ struct LoadedModels(IndexMap>) struct LoadedModel { metas: VoiceModelMeta, - session_sets_with_inner_ids: InferenceDomainMap>, + session_sets_with_inner_ids: InferenceDomainMap>, } impl LoadedModels { @@ -133,7 +133,7 @@ impl LoadedModels { fn ids_for( &self, style_id: StyleId, - ) -> Result<(VoiceModelId, ModelInnerId)> { + ) -> Result<(VoiceModelId, InnerVoiceId)> { let ( model_id, LoadedModel { @@ -153,13 +153,13 @@ impl LoadedModels { style_types: D::style_types(), })?; - let model_inner_id = session_sets_with_inner_ids + let inner_voice_id = session_sets_with_inner_ids .get::() .as_ref() - .and_then(|(model_inner_ids, _)| model_inner_ids.get(&style_id).copied()) - .unwrap_or_else(|| ModelInnerId::new(style_id.raw_id())); + .and_then(|(inner_voice_ids, _)| inner_voice_ids.get(&style_id).copied()) + .unwrap_or_else(|| InnerVoiceId::new(style_id.raw_id())); - Ok((model_id.clone(), model_inner_id)) + Ok((model_id.clone(), inner_voice_id)) } /// # Panics @@ -250,7 +250,7 @@ impl LoadedModels { fn insert( &mut self, model_header: &VoiceModelHeader, - session_sets_with_inner_ids: InferenceDomainMap>, + session_sets_with_inner_ids: InferenceDomainMap>, ) -> Result<()> { self.ensure_acceptable(model_header)?; @@ -286,8 +286,8 @@ impl LoadedModels { pub(crate) trait InferenceDomainExt: InferenceDomain { fn visit( - map: &InferenceDomainMap>, - ) -> Option<&(StyleIdToModelInnerId, InferenceSessionSet)>; + map: &InferenceDomainMap>, + ) -> Option<&(StyleIdToInnerVoiceId, InferenceSessionSet)>; } #[duplicate_item( @@ -296,25 +296,25 @@ pub(crate) trait InferenceDomainExt: InferenceDomain { )] impl InferenceDomainExt for T { fn visit( - map: &InferenceDomainMap>, - ) -> Option<&(StyleIdToModelInnerId, InferenceSessionSet)> { + map: &InferenceDomainMap>, + ) -> Option<&(StyleIdToInnerVoiceId, InferenceSessionSet)> { map.field.as_ref() } } -impl InferenceDomainMap> { +impl InferenceDomainMap> { fn get( &self, - ) -> Option<&(StyleIdToModelInnerId, InferenceSessionSet)> { + ) -> Option<&(StyleIdToInnerVoiceId, InferenceSessionSet)> { D::visit(self) } } -impl InferenceDomainMap { +impl InferenceDomainMap { fn create_session_sets( &self, session_options: &InferenceDomainMap, - ) -> anyhow::Result>> { + ) -> anyhow::Result>> { duplicate! { [ field; @@ -323,9 +323,9 @@ impl InferenceDomainMap { let field = self .field .as_ref() - .map(|(model_inner_ids, model_bytes)| { + .map(|(inner_voice_ids, model_bytes)| { let session_set = InferenceSessionSet::new(model_bytes, &session_options.field)?; - Ok::<_, anyhow::Error>((model_inner_ids.clone(), session_set)) + Ok::<_, anyhow::Error>((inner_voice_ids.clone(), session_set)) }) .transpose()?; } @@ -336,8 +336,8 @@ impl InferenceDomainMap { type SessionOptionsByDomain = (EnumMap,); -type SessionSetsWithInnerIdsByDomain = - (Option<(StyleIdToModelInnerId, InferenceSessionSet)>,); +type SessionSetsWithInnerVoiceIdsByDomain = + (Option<(StyleIdToInnerVoiceId, InferenceSessionSet)>,); #[cfg(test)] mod tests { diff --git a/crates/voicevox_core/src/synthesizer.rs b/crates/voicevox_core/src/synthesizer.rs index 5e4894415..06555ea28 100644 --- a/crates/voicevox_core/src/synthesizer.rs +++ b/crates/voicevox_core/src/synthesizer.rs @@ -836,7 +836,7 @@ pub(crate) mod blocking { impl PerformInference for self::Synthesizer { fn predict_duration(&self, phoneme_vector: &[i64], style_id: StyleId) -> Result> { - let (model_id, model_inner_id) = self.status.ids_for::(style_id)?; + let (model_id, inner_voice_id) = self.status.ids_for::(style_id)?; let PredictDurationOutput { phoneme_length: output, @@ -844,7 +844,7 @@ pub(crate) mod blocking { &model_id, PredictDurationInput { phoneme_list: ndarray::arr1(phoneme_vector), - speaker_id: ndarray::arr1(&[model_inner_id.raw_id().into()]), + speaker_id: ndarray::arr1(&[inner_voice_id.raw_id().into()]), }, )?; let mut output = output.into_raw_vec(); @@ -871,7 +871,7 @@ pub(crate) mod blocking { end_accent_phrase_vector: &[i64], style_id: StyleId, ) -> Result> { - let (model_id, model_inner_id) = self.status.ids_for::(style_id)?; + let (model_id, inner_voice_id) = self.status.ids_for::(style_id)?; let PredictIntonationOutput { f0_list: output } = self.status.run_session( &model_id, @@ -883,7 +883,7 @@ pub(crate) mod blocking { end_accent_list: ndarray::arr1(end_accent_vector), start_accent_phrase_list: ndarray::arr1(start_accent_phrase_vector), end_accent_phrase_list: ndarray::arr1(end_accent_phrase_vector), - speaker_id: ndarray::arr1(&[model_inner_id.raw_id().into()]), + speaker_id: ndarray::arr1(&[inner_voice_id.raw_id().into()]), }, )?; @@ -898,7 +898,7 @@ pub(crate) mod blocking { phoneme_vector: &[f32], style_id: StyleId, ) -> Result> { - let (model_id, model_inner_id) = self.status.ids_for::(style_id)?; + let (model_id, inner_voice_id) = self.status.ids_for::(style_id)?; // 音が途切れてしまうのを避けるworkaround処理が入っている // TODO: 改善したらここのpadding処理を取り除く @@ -925,7 +925,7 @@ pub(crate) mod blocking { phoneme: ndarray::arr1(&phoneme_with_padding) .into_shape([length_with_padding, phoneme_size]) .unwrap(), - speaker_id: ndarray::arr1(&[model_inner_id.raw_id().into()]), + speaker_id: ndarray::arr1(&[inner_voice_id.raw_id().into()]), }, )?; diff --git a/crates/voicevox_core/src/voice_model.rs b/crates/voicevox_core/src/voice_model.rs index 364c8db0a..358d0153c 100644 --- a/crates/voicevox_core/src/voice_model.rs +++ b/crates/voicevox_core/src/voice_model.rs @@ -16,7 +16,7 @@ use crate::{ domains::{TalkDomain, TalkOperation}, InferenceDomain, }, - manifest::{Manifest, ManifestDomains, StyleIdToModelInnerId}, + manifest::{Manifest, ManifestDomains, StyleIdToInnerVoiceId}, SpeakerMeta, StyleMeta, StyleType, VoiceModelMeta, }; use std::path::{Path, PathBuf}; @@ -26,8 +26,8 @@ use std::path::{Path, PathBuf}; /// [`VoiceModelId`]: VoiceModelId pub type RawVoiceModelId = String; -pub(crate) type ModelBytesWithInnerIdsByDomain = - (Option<(StyleIdToModelInnerId, EnumMap>)>,); +pub(crate) type ModelBytesWithInnerVoiceIdsByDomain = + (Option<(StyleIdToInnerVoiceId, EnumMap>)>,); /// 音声モデルID。 #[derive( @@ -164,7 +164,7 @@ pub(crate) mod blocking { VoiceModelMeta, }; - use super::{ModelBytesWithInnerIdsByDomain, VoiceModelHeader, VoiceModelId}; + use super::{ModelBytesWithInnerVoiceIdsByDomain, VoiceModelHeader, VoiceModelId}; /// 音声モデル。 /// @@ -177,7 +177,7 @@ pub(crate) mod blocking { impl self::VoiceModel { pub(crate) fn read_inference_models( &self, - ) -> LoadModelResult> { + ) -> LoadModelResult> { let reader = BlockingVvmEntryReader::open(&self.header.path)?; let talk = self @@ -191,7 +191,7 @@ pub(crate) mod blocking { predict_duration_filename, predict_intonation_filename, decode_filename, - style_id_to_model_inner_id, + style_id_to_inner_voice_id, }| { let model_bytes = [ predict_duration_filename, @@ -206,7 +206,7 @@ pub(crate) mod blocking { let model_bytes = EnumMap::from_array(model_bytes); - Ok((style_id_to_model_inner_id.clone(), model_bytes)) + Ok((style_id_to_inner_voice_id.clone(), model_bytes)) }, ) .transpose()?; @@ -307,7 +307,7 @@ pub(crate) mod tokio { Result, VoiceModelMeta, }; - use super::{ModelBytesWithInnerIdsByDomain, VoiceModelHeader, VoiceModelId}; + use super::{ModelBytesWithInnerVoiceIdsByDomain, VoiceModelHeader, VoiceModelId}; /// 音声モデル。 /// @@ -320,7 +320,7 @@ pub(crate) mod tokio { impl self::VoiceModel { pub(crate) async fn read_inference_models( &self, - ) -> LoadModelResult> { + ) -> LoadModelResult> { let reader = AsyncVvmEntryReader::open(&self.header.path).await?; let talk = OptionFuture::from(self.header.manifest.domains().talk.as_ref().map( @@ -328,7 +328,7 @@ pub(crate) mod tokio { predict_duration_filename, predict_intonation_filename, decode_filename, - style_id_to_model_inner_id, + style_id_to_inner_voice_id, }| async { let ( decode_model_result, @@ -347,7 +347,7 @@ pub(crate) mod tokio { decode_model_result?, ]); - Ok((style_id_to_model_inner_id.clone(), model_bytes)) + Ok((style_id_to_inner_voice_id.clone(), model_bytes)) }, )) .await @@ -505,7 +505,7 @@ mod tests { predict_duration_filename: "".to_owned(), predict_intonation_filename: "".to_owned(), decode_filename: "".to_owned(), - style_id_to_model_inner_id: Default::default(), + style_id_to_inner_voice_id: Default::default(), }); #[fixture] diff --git a/model/sample.vvm/manifest.json b/model/sample.vvm/manifest.json index 2c6721d08..0b82d0c3f 100644 --- a/model/sample.vvm/manifest.json +++ b/model/sample.vvm/manifest.json @@ -5,7 +5,7 @@ "predict_duration_filename": "predict_duration.onnx", "predict_intonation_filename": "predict_intonation.onnx", "decode_filename": "decode.onnx", - "style_id_to_model_inner_id": { + "style_id_to_inner_voice_id": { "302": 2, "303": 3 }