From 9c3a94e663263595e9e9df0c0b763620c0e1615c Mon Sep 17 00:00:00 2001
From: Ryo Yamashita <qryxip@gmail.com>
Date: Fri, 24 May 2024 08:00:24 +0900
Subject: [PATCH] =?UTF-8?q?change:=20`style=5Fid=5Fto=5Fmodel=5Finner=5Fid?=
 =?UTF-8?q?`=20=E2=86=92=20`style=5Fid=5Fto=5Finner=5Fvoice=5Fid`=20(#795)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 crates/voicevox_core/src/manifest.rs    | 16 ++++----
 crates/voicevox_core/src/status.rs      | 50 ++++++++++++-------------
 crates/voicevox_core/src/synthesizer.rs | 12 +++---
 crates/voicevox_core/src/voice_model.rs | 24 ++++++------
 model/sample.vvm/manifest.json          |  2 +-
 5 files changed, 52 insertions(+), 52 deletions(-)
diff --git a/crates/voicevox_core/src/manifest.rs b/crates/voicevox_core/src/manifest.rs
index 3b17ae3f1..a22b66e8b 100644
--- a/crates/voicevox_core/src/manifest.rs
+++ b/crates/voicevox_core/src/manifest.rs
@@ -19,18 +19,18 @@ impl Display for ManifestVersion {
 }
 
 /// モデル内IDの実体
-pub type RawModelInnerId = u32;
+pub type RawInnerVoiceId = u32;
 /// モデル内ID
 #[derive(PartialEq, Eq, Clone, Copy, Ord, PartialOrd, Deserialize, Serialize, new, Debug)]
-pub struct ModelInnerId(RawModelInnerId);
+pub struct InnerVoiceId(RawInnerVoiceId);
 
-impl ModelInnerId {
-    pub fn raw_id(self) -> RawModelInnerId {
+impl InnerVoiceId {
+    pub fn raw_id(self) -> RawInnerVoiceId {
         self.0
     }
 }
 
-impl Display for ModelInnerId {
+impl Display for InnerVoiceId {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         write!(f, "{}", self.raw_id())
     }
@@ -58,12 +58,12 @@ pub(crate) struct TalkManifest {
     pub(crate) predict_intonation_filename: String,
     pub(crate) decode_filename: String,
     #[serde(default)]
-    pub(crate) style_id_to_model_inner_id: StyleIdToModelInnerId,
+    pub(crate) style_id_to_inner_voice_id: StyleIdToInnerVoiceId,
 }
 
 #[serde_as]
 #[derive(Default, Clone, Deref, Deserialize)]
 #[deref(forward)]
-pub(crate) struct StyleIdToModelInnerId(
-    #[serde_as(as = "Arc<BTreeMap<DisplayFromStr, _>>")] Arc<BTreeMap<StyleId, ModelInnerId>>,
+pub(crate) struct StyleIdToInnerVoiceId(
+    #[serde_as(as = "Arc<BTreeMap<DisplayFromStr, _>>")] Arc<BTreeMap<StyleId, InnerVoiceId>>,
 );
diff --git a/crates/voicevox_core/src/status.rs b/crates/voicevox_core/src/status.rs
index a47de689b..6980ab7fc 100644
--- a/crates/voicevox_core/src/status.rs
+++ b/crates/voicevox_core/src/status.rs
@@ -14,9 +14,9 @@ use crate::{
         InferenceDomain, InferenceInputSignature, InferenceRuntime, InferenceSessionOptions,
         InferenceSignature,
     },
-    manifest::{ModelInnerId, StyleIdToModelInnerId},
+    manifest::{InnerVoiceId, StyleIdToInnerVoiceId},
     metas::{self, SpeakerMeta, StyleId, StyleMeta, VoiceModelMeta},
-    voice_model::{ModelBytesWithInnerIdsByDomain, VoiceModelHeader, VoiceModelId},
+    voice_model::{ModelBytesWithInnerVoiceIdsByDomain, VoiceModelHeader, VoiceModelId},
     Result,
 };
 
@@ -36,7 +36,7 @@ impl<R: InferenceRuntime> Status<R> {
     pub(crate) fn insert_model(
         &self,
         model_header: &VoiceModelHeader,
-        model_contents: &InferenceDomainMap<ModelBytesWithInnerIdsByDomain>,
+        model_contents: &InferenceDomainMap<ModelBytesWithInnerVoiceIdsByDomain>,
     ) -> Result<()> {
         self.loaded_models
             .lock()
@@ -66,14 +66,14 @@ impl<R: InferenceRuntime> Status<R> {
         self.loaded_models.lock().unwrap().metas()
     }
 
-    /// あるスタイルに対応する`VoiceModelId`と`ModelInnerId`の組を返す。
+    /// あるスタイルに対応する`VoiceModelId`と`InnerVoiceId`の組を返す。
     ///
-    /// `StyleId` → `ModelInnerId`のマッピングが存在しない場合は、`ModelInnerId`としては
+    /// `StyleId` → `InnerVoiceId`のマッピングが存在しない場合は、`InnerVoiceId`としては
     /// `style_id`と同じ値を返す。
     pub(crate) fn ids_for<D: InferenceDomainExt>(
         &self,
         style_id: StyleId,
-    ) -> Result<(VoiceModelId, ModelInnerId)> {
+    ) -> Result<(VoiceModelId, InnerVoiceId)> {
         self.loaded_models.lock().unwrap().ids_for::<D>(style_id)
     }
 
@@ -122,7 +122,7 @@ struct LoadedModels<R: InferenceRuntime>(IndexMap<VoiceModelId, LoadedModel<R>>)
 
 struct LoadedModel<R: InferenceRuntime> {
     metas: VoiceModelMeta,
-    session_sets_with_inner_ids: InferenceDomainMap<SessionSetsWithInnerIdsByDomain<R>>,
+    session_sets_with_inner_ids: InferenceDomainMap<SessionSetsWithInnerVoiceIdsByDomain<R>>,
 }
 
 impl<R: InferenceRuntime> LoadedModels<R> {
@@ -133,7 +133,7 @@ impl<R: InferenceRuntime> LoadedModels<R> {
     fn ids_for<D: InferenceDomainExt>(
         &self,
         style_id: StyleId,
-    ) -> Result<(VoiceModelId, ModelInnerId)> {
+    ) -> Result<(VoiceModelId, InnerVoiceId)> {
         let (
             model_id,
             LoadedModel {
@@ -153,13 +153,13 @@ impl<R: InferenceRuntime> LoadedModels<R> {
                 style_types: D::style_types(),
             })?;
 
-        let model_inner_id = session_sets_with_inner_ids
+        let inner_voice_id = session_sets_with_inner_ids
             .get::<D>()
             .as_ref()
-            .and_then(|(model_inner_ids, _)| model_inner_ids.get(&style_id).copied())
-            .unwrap_or_else(|| ModelInnerId::new(style_id.raw_id()));
+            .and_then(|(inner_voice_ids, _)| inner_voice_ids.get(&style_id).copied())
+            .unwrap_or_else(|| InnerVoiceId::new(style_id.raw_id()));
 
-        Ok((model_id.clone(), model_inner_id))
+        Ok((model_id.clone(), inner_voice_id))
     }
 
     /// # Panics
@@ -250,7 +250,7 @@ impl<R: InferenceRuntime> LoadedModels<R> {
     fn insert(
         &mut self,
         model_header: &VoiceModelHeader,
-        session_sets_with_inner_ids: InferenceDomainMap<SessionSetsWithInnerIdsByDomain<R>>,
+        session_sets_with_inner_ids: InferenceDomainMap<SessionSetsWithInnerVoiceIdsByDomain<R>>,
     ) -> Result<()> {
         self.ensure_acceptable(model_header)?;
 
@@ -286,8 +286,8 @@ impl<R: InferenceRuntime> LoadedModels<R> {
 
 pub(crate) trait InferenceDomainExt: InferenceDomain {
     fn visit<R: InferenceRuntime>(
-        map: &InferenceDomainMap<SessionSetsWithInnerIdsByDomain<R>>,
-    ) -> Option<&(StyleIdToModelInnerId, InferenceSessionSet<R, Self>)>;
+        map: &InferenceDomainMap<SessionSetsWithInnerVoiceIdsByDomain<R>>,
+    ) -> Option<&(StyleIdToInnerVoiceId, InferenceSessionSet<R, Self>)>;
 }
 
 #[duplicate_item(
@@ -296,25 +296,25 @@ pub(crate) trait InferenceDomainExt: InferenceDomain {
 )]
 impl InferenceDomainExt for T {
     fn visit<R: InferenceRuntime>(
-        map: &InferenceDomainMap<SessionSetsWithInnerIdsByDomain<R>>,
-    ) -> Option<&(StyleIdToModelInnerId, InferenceSessionSet<R, Self>)> {
+        map: &InferenceDomainMap<SessionSetsWithInnerVoiceIdsByDomain<R>>,
+    ) -> Option<&(StyleIdToInnerVoiceId, InferenceSessionSet<R, Self>)> {
         map.field.as_ref()
     }
 }
 
-impl<R: InferenceRuntime> InferenceDomainMap<SessionSetsWithInnerIdsByDomain<R>> {
+impl<R: InferenceRuntime> InferenceDomainMap<SessionSetsWithInnerVoiceIdsByDomain<R>> {
     fn get<D: InferenceDomainExt>(
         &self,
-    ) -> Option<&(StyleIdToModelInnerId, InferenceSessionSet<R, D>)> {
+    ) -> Option<&(StyleIdToInnerVoiceId, InferenceSessionSet<R, D>)> {
         D::visit(self)
     }
 }
 
-impl InferenceDomainMap<ModelBytesWithInnerIdsByDomain> {
+impl InferenceDomainMap<ModelBytesWithInnerVoiceIdsByDomain> {
     fn create_session_sets<R: InferenceRuntime>(
         &self,
         session_options: &InferenceDomainMap<SessionOptionsByDomain>,
-    ) -> anyhow::Result<InferenceDomainMap<SessionSetsWithInnerIdsByDomain<R>>> {
+    ) -> anyhow::Result<InferenceDomainMap<SessionSetsWithInnerVoiceIdsByDomain<R>>> {
         duplicate! {
             [
                 field;
@@ -323,9 +323,9 @@ impl InferenceDomainMap<ModelBytesWithInnerIdsByDomain> {
             let field = self
                 .field
                 .as_ref()
-                .map(|(model_inner_ids, model_bytes)| {
+                .map(|(inner_voice_ids, model_bytes)| {
                     let session_set = InferenceSessionSet::new(model_bytes, &session_options.field)?;
-                    Ok::<_, anyhow::Error>((model_inner_ids.clone(), session_set))
+                    Ok::<_, anyhow::Error>((inner_voice_ids.clone(), session_set))
                 })
                 .transpose()?;
         }
@@ -336,8 +336,8 @@ impl InferenceDomainMap<ModelBytesWithInnerIdsByDomain> {
 
 type SessionOptionsByDomain = (EnumMap<TalkOperation, InferenceSessionOptions>,);
 
-type SessionSetsWithInnerIdsByDomain<R> =
-    (Option<(StyleIdToModelInnerId, InferenceSessionSet<R, TalkDomain>)>,);
+type SessionSetsWithInnerVoiceIdsByDomain<R> =
+    (Option<(StyleIdToInnerVoiceId, InferenceSessionSet<R, TalkDomain>)>,);
 
 #[cfg(test)]
 mod tests {
diff --git a/crates/voicevox_core/src/synthesizer.rs b/crates/voicevox_core/src/synthesizer.rs
index 5e4894415..06555ea28 100644
--- a/crates/voicevox_core/src/synthesizer.rs
+++ b/crates/voicevox_core/src/synthesizer.rs
@@ -836,7 +836,7 @@ pub(crate) mod blocking {
 
     impl<O> PerformInference for self::Synthesizer<O> {
         fn predict_duration(&self, phoneme_vector: &[i64], style_id: StyleId) -> Result<Vec<f32>> {
-            let (model_id, model_inner_id) = self.status.ids_for::<TalkDomain>(style_id)?;
+            let (model_id, inner_voice_id) = self.status.ids_for::<TalkDomain>(style_id)?;
 
             let PredictDurationOutput {
                 phoneme_length: output,
@@ -844,7 +844,7 @@ pub(crate) mod blocking {
                 &model_id,
                 PredictDurationInput {
                     phoneme_list: ndarray::arr1(phoneme_vector),
-                    speaker_id: ndarray::arr1(&[model_inner_id.raw_id().into()]),
+                    speaker_id: ndarray::arr1(&[inner_voice_id.raw_id().into()]),
                 },
             )?;
             let mut output = output.into_raw_vec();
@@ -871,7 +871,7 @@ pub(crate) mod blocking {
             end_accent_phrase_vector: &[i64],
             style_id: StyleId,
         ) -> Result<Vec<f32>> {
-            let (model_id, model_inner_id) = self.status.ids_for::<TalkDomain>(style_id)?;
+            let (model_id, inner_voice_id) = self.status.ids_for::<TalkDomain>(style_id)?;
 
             let PredictIntonationOutput { f0_list: output } = self.status.run_session(
                 &model_id,
@@ -883,7 +883,7 @@ pub(crate) mod blocking {
                     end_accent_list: ndarray::arr1(end_accent_vector),
                     start_accent_phrase_list: ndarray::arr1(start_accent_phrase_vector),
                     end_accent_phrase_list: ndarray::arr1(end_accent_phrase_vector),
-                    speaker_id: ndarray::arr1(&[model_inner_id.raw_id().into()]),
+                    speaker_id: ndarray::arr1(&[inner_voice_id.raw_id().into()]),
                 },
             )?;
 
@@ -898,7 +898,7 @@ pub(crate) mod blocking {
             phoneme_vector: &[f32],
             style_id: StyleId,
         ) -> Result<Vec<f32>> {
-            let (model_id, model_inner_id) = self.status.ids_for::<TalkDomain>(style_id)?;
+            let (model_id, inner_voice_id) = self.status.ids_for::<TalkDomain>(style_id)?;
 
             // 音が途切れてしまうのを避けるworkaround処理が入っている
             // TODO: 改善したらここのpadding処理を取り除く
@@ -925,7 +925,7 @@ pub(crate) mod blocking {
                     phoneme: ndarray::arr1(&phoneme_with_padding)
                         .into_shape([length_with_padding, phoneme_size])
                         .unwrap(),
-                    speaker_id: ndarray::arr1(&[model_inner_id.raw_id().into()]),
+                    speaker_id: ndarray::arr1(&[inner_voice_id.raw_id().into()]),
                 },
             )?;
 
diff --git a/crates/voicevox_core/src/voice_model.rs b/crates/voicevox_core/src/voice_model.rs
index 364c8db0a..358d0153c 100644
--- a/crates/voicevox_core/src/voice_model.rs
+++ b/crates/voicevox_core/src/voice_model.rs
@@ -16,7 +16,7 @@ use crate::{
         domains::{TalkDomain, TalkOperation},
         InferenceDomain,
     },
-    manifest::{Manifest, ManifestDomains, StyleIdToModelInnerId},
+    manifest::{Manifest, ManifestDomains, StyleIdToInnerVoiceId},
     SpeakerMeta, StyleMeta, StyleType, VoiceModelMeta,
 };
 use std::path::{Path, PathBuf};
@@ -26,8 +26,8 @@ use std::path::{Path, PathBuf};
 /// [`VoiceModelId`]: VoiceModelId
 pub type RawVoiceModelId = String;
 
-pub(crate) type ModelBytesWithInnerIdsByDomain =
-    (Option<(StyleIdToModelInnerId, EnumMap<TalkOperation, Vec<u8>>)>,);
+pub(crate) type ModelBytesWithInnerVoiceIdsByDomain =
+    (Option<(StyleIdToInnerVoiceId, EnumMap<TalkOperation, Vec<u8>>)>,);
 
 /// 音声モデルID。
 #[derive(
@@ -164,7 +164,7 @@ pub(crate) mod blocking {
         VoiceModelMeta,
     };
 
-    use super::{ModelBytesWithInnerIdsByDomain, VoiceModelHeader, VoiceModelId};
+    use super::{ModelBytesWithInnerVoiceIdsByDomain, VoiceModelHeader, VoiceModelId};
 
     /// 音声モデル。
     ///
@@ -177,7 +177,7 @@ pub(crate) mod blocking {
     impl self::VoiceModel {
         pub(crate) fn read_inference_models(
             &self,
-        ) -> LoadModelResult<InferenceDomainMap<ModelBytesWithInnerIdsByDomain>> {
+        ) -> LoadModelResult<InferenceDomainMap<ModelBytesWithInnerVoiceIdsByDomain>> {
             let reader = BlockingVvmEntryReader::open(&self.header.path)?;
 
             let talk = self
@@ -191,7 +191,7 @@ pub(crate) mod blocking {
                          predict_duration_filename,
                          predict_intonation_filename,
                          decode_filename,
-                         style_id_to_model_inner_id,
+                         style_id_to_inner_voice_id,
                      }| {
                         let model_bytes = [
                             predict_duration_filename,
@@ -206,7 +206,7 @@ pub(crate) mod blocking {
 
                         let model_bytes = EnumMap::from_array(model_bytes);
 
-                        Ok((style_id_to_model_inner_id.clone(), model_bytes))
+                        Ok((style_id_to_inner_voice_id.clone(), model_bytes))
                     },
                 )
                 .transpose()?;
@@ -307,7 +307,7 @@ pub(crate) mod tokio {
         Result, VoiceModelMeta,
     };
 
-    use super::{ModelBytesWithInnerIdsByDomain, VoiceModelHeader, VoiceModelId};
+    use super::{ModelBytesWithInnerVoiceIdsByDomain, VoiceModelHeader, VoiceModelId};
 
     /// 音声モデル。
     ///
@@ -320,7 +320,7 @@ pub(crate) mod tokio {
     impl self::VoiceModel {
         pub(crate) async fn read_inference_models(
             &self,
-        ) -> LoadModelResult<InferenceDomainMap<ModelBytesWithInnerIdsByDomain>> {
+        ) -> LoadModelResult<InferenceDomainMap<ModelBytesWithInnerVoiceIdsByDomain>> {
             let reader = AsyncVvmEntryReader::open(&self.header.path).await?;
 
             let talk = OptionFuture::from(self.header.manifest.domains().talk.as_ref().map(
@@ -328,7 +328,7 @@ pub(crate) mod tokio {
                      predict_duration_filename,
                      predict_intonation_filename,
                      decode_filename,
-                     style_id_to_model_inner_id,
+                     style_id_to_inner_voice_id,
                  }| async {
                     let (
                         decode_model_result,
@@ -347,7 +347,7 @@ pub(crate) mod tokio {
                         decode_model_result?,
                     ]);
 
-                    Ok((style_id_to_model_inner_id.clone(), model_bytes))
+                    Ok((style_id_to_inner_voice_id.clone(), model_bytes))
                 },
             ))
             .await
@@ -505,7 +505,7 @@ mod tests {
         predict_duration_filename: "".to_owned(),
         predict_intonation_filename: "".to_owned(),
         decode_filename: "".to_owned(),
-        style_id_to_model_inner_id: Default::default(),
+        style_id_to_inner_voice_id: Default::default(),
     });
 
     #[fixture]
diff --git a/model/sample.vvm/manifest.json b/model/sample.vvm/manifest.json
index 2c6721d08..0b82d0c3f 100644
--- a/model/sample.vvm/manifest.json
+++ b/model/sample.vvm/manifest.json
@@ -5,7 +5,7 @@
     "predict_duration_filename": "predict_duration.onnx",
     "predict_intonation_filename": "predict_intonation.onnx",
     "decode_filename": "decode.onnx",
-    "style_id_to_model_inner_id": {
+    "style_id_to_inner_voice_id": {
       "302": 2,
       "303": 3
     }