From 7d0aa000509bba4df254fbc1d0a16286d9410f0d Mon Sep 17 00:00:00 2001 From: Ryo Yamashita <qryxip@gmail.com> Date: Mon, 22 Jan 2024 02:59:31 +0900 Subject: [PATCH 1/7] =?UTF-8?q?`metas`=E5=87=BA=E5=8A=9B=E6=99=82=E3=81=AB?= =?UTF-8?q?=E8=A9=B1=E8=80=85=E6=83=85=E5=A0=B1=E3=82=92=E3=83=9E=E3=83=BC?= =?UTF-8?q?=E3=82=B8=E3=81=99=E3=82=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../voicevox_core/src/__internal/interop.rs | 2 +- crates/voicevox_core/src/infer/status.rs | 44 ++++++++++++------- crates/voicevox_core/src/metas.rs | 41 +++++++++++++++++ .../src/compatible_engine.rs | 4 +- 4 files changed, 72 insertions(+), 19 deletions(-) diff --git a/crates/voicevox_core/src/__internal/interop.rs b/crates/voicevox_core/src/__internal/interop.rs index c8cd7101f..fe46d10bc 100644 --- a/crates/voicevox_core/src/__internal/interop.rs +++ b/crates/voicevox_core/src/__internal/interop.rs @@ -1 +1 @@ -pub use crate::synthesizer::blocking::PerformInference; +pub use crate::{metas::merge as merge_metas, synthesizer::blocking::PerformInference}; diff --git a/crates/voicevox_core/src/infer/status.rs b/crates/voicevox_core/src/infer/status.rs index 12367dda2..a9a1836e0 100644 --- a/crates/voicevox_core/src/infer/status.rs +++ b/crates/voicevox_core/src/infer/status.rs @@ -14,7 +14,7 @@ use crate::{ error::{ErrorRepr, LoadModelError, LoadModelErrorKind, LoadModelResult}, infer::{InferenceOperation, ParamInfo}, manifest::ModelInnerId, - metas::{SpeakerMeta, StyleId, StyleMeta, VoiceModelMeta}, + metas::{self, SpeakerMeta, StyleId, StyleMeta, VoiceModelMeta}, voice_model::{VoiceModelHeader, VoiceModelId}, Result, }; @@ -130,11 +130,7 @@ struct LoadedModel<R: InferenceRuntime, D: InferenceDomain> { impl<R: InferenceRuntime, D: InferenceDomain> LoadedModels<R, D> { fn metas(&self) -> VoiceModelMeta { - self.0 - .values() - .flat_map(|LoadedModel { metas, .. }| metas) - .cloned() - .collect() + metas::merge(self.0.values().flat_map(|LoadedModel { metas, .. }| metas)) } fn ids_for(&self, style_id: StyleId) -> Result<(VoiceModelId, ModelInnerId)> { @@ -184,20 +180,33 @@ impl<R: InferenceRuntime, D: InferenceDomain> LoadedModels<R, D> { /// /// # Errors /// - /// 音声モデルIDかスタイルIDが`model_header`と重複するとき、エラーを返す。 + /// 次の場合にエラーを返す。 + /// + /// - 音声モデルIDかスタイルIDが`model_header`と重複するとき + /// - 同じ`speaker_uuid`で、スタイル以外でプロパティが異なっている話者があるとき fn ensure_acceptable(&self, model_header: &VoiceModelHeader) -> LoadModelResult<()> { - let loaded = self.styles(); - let external = model_header - .metas - .iter() - .flat_map(|speaker| speaker.styles()); - let error = |context| LoadModelError { path: model_header.path.clone(), context, source: None, }; + let loaded = self.speakers(); + let external = model_header.metas.iter(); + if let Some((_loaded, _external)) = + iproduct!(loaded, external).find(|(loaded, external)| { + loaded.speaker_uuid() == external.speaker_uuid() + && !loaded.eq_except_styles(external) + }) + { + todo!("same `speaker_uuid` but different properties"); + } + + let loaded = self.styles(); + let external = model_header + .metas + .iter() + .flat_map(|speaker| speaker.styles()); if self.0.contains_key(&model_header.id) { return Err(error(LoadModelErrorKind::ModelAlreadyLoaded { id: model_header.id.clone(), @@ -242,11 +251,12 @@ impl<R: InferenceRuntime, D: InferenceDomain> LoadedModels<R, D> { Ok(()) } + fn speakers(&self) -> impl Iterator<Item = &SpeakerMeta> { + self.0.values().flat_map(|LoadedModel { metas, .. }| metas) + } + fn styles(&self) -> impl Iterator<Item = &StyleMeta> { - self.0 - .values() - .flat_map(|LoadedModel { metas, .. }| metas) - .flat_map(|speaker| speaker.styles()) + self.speakers().flat_map(|speaker| speaker.styles()) } } diff --git a/crates/voicevox_core/src/metas.rs b/crates/voicevox_core/src/metas.rs index 77cb3a9fc..6e5ab29ea 100644 --- a/crates/voicevox_core/src/metas.rs +++ b/crates/voicevox_core/src/metas.rs @@ -2,8 +2,29 @@ use std::fmt::Display; use derive_getters::Getters; use derive_new::new; +use itertools::Itertools as _; use serde::{Deserialize, Serialize}; +pub fn merge<'a>(metas: impl IntoIterator<Item = &'a SpeakerMeta>) -> Vec<SpeakerMeta> { + metas + .into_iter() + .into_grouping_map_by(|speaker| &speaker.speaker_uuid) + .aggregate::<_, SpeakerMeta>(|acc, _, speaker| { + Some( + acc.map(|mut acc| { + acc.styles.extend(speaker.styles.clone()); + acc + }) + .unwrap_or_else(|| speaker.clone()), + ) + }) + .into_values() + .sorted_by_key(|SpeakerMeta { styles, .. }| { + styles.iter().map(|&StyleMeta { id, .. }| id).min() + }) + .collect() +} + /// [`StyleId`]の実体。 /// /// [`StyleId`]: StyleId @@ -67,6 +88,26 @@ pub struct SpeakerMeta { speaker_uuid: String, } +impl SpeakerMeta { + pub(crate) fn eq_except_styles(&self, other: &Self) -> bool { + let Self { + name: name1, + styles: _, + version: version1, + speaker_uuid: speaker_uuid1, + } = self; + + let Self { + name: name2, + styles: _, + version: version2, + speaker_uuid: speaker_uuid2, + } = other; + + (name1, version1, speaker_uuid1) == (name2, version2, speaker_uuid2) + } +} + /// **スタイル**(_style_)のメタ情報。 #[derive(Deserialize, Serialize, Getters, Clone)] pub struct StyleMeta { diff --git a/crates/voicevox_core_c_api/src/compatible_engine.rs b/crates/voicevox_core_c_api/src/compatible_engine.rs index 5a6f20f76..6755910f5 100644 --- a/crates/voicevox_core_c_api/src/compatible_engine.rs +++ b/crates/voicevox_core_c_api/src/compatible_engine.rs @@ -41,7 +41,9 @@ static VOICE_MODEL_SET: Lazy<VoiceModelSet> = Lazy::new(|| { .iter() .map(|vvm| (vvm.id().clone(), vvm.clone())) .collect(); - let metas: Vec<_> = all_vvms.iter().flat_map(|vvm| vvm.metas()).collect(); + let metas = voicevox_core::__internal::interop::merge_metas( + all_vvms.iter().flat_map(|vvm| vvm.metas()), + ); let mut style_model_map = BTreeMap::default(); for vvm in all_vvms.iter() { for meta in vvm.metas().iter() { From 2b417633abc16d8740f4dddab9b37f6703aa5236 Mon Sep 17 00:00:00 2001 From: Ryo Yamashita <qryxip@gmail.com> Date: Mon, 22 Jan 2024 03:10:58 +0900 Subject: [PATCH 2/7] =?UTF-8?q?=E3=82=B9=E3=82=BF=E3=82=A4=E3=83=AB?= =?UTF-8?q?=E3=82=82=E3=82=BD=E3=83=BC=E3=83=88=E3=81=99=E3=82=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/voicevox_core/src/metas.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/crates/voicevox_core/src/metas.rs b/crates/voicevox_core/src/metas.rs index 6e5ab29ea..7dd943de7 100644 --- a/crates/voicevox_core/src/metas.rs +++ b/crates/voicevox_core/src/metas.rs @@ -19,8 +19,14 @@ pub fn merge<'a>(metas: impl IntoIterator<Item = &'a SpeakerMeta>) -> Vec<Speake ) }) .into_values() - .sorted_by_key(|SpeakerMeta { styles, .. }| { - styles.iter().map(|&StyleMeta { id, .. }| id).min() + .map(|mut speaker| { + speaker + .styles + .sort_unstable_by_key(|&StyleMeta { id, .. }| id); + speaker + }) + .sorted_unstable_by_key(|SpeakerMeta { styles, .. }| { + styles.first().map(|&StyleMeta { id, .. }| id) }) .collect() } From 178d5ac29c8007100f7bfdb54f67c0b46a70bcc2 Mon Sep 17 00:00:00 2001 From: Ryo Yamashita <qryxip@gmail.com> Date: Sat, 27 Jan 2024 16:30:52 +0900 Subject: [PATCH 3/7] =?UTF-8?q?`SpeakerMeta::{speaker=5Forder,style=5Forde?= =?UTF-8?q?r}`=E3=82=92=E5=B0=8E=E5=85=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/voicevox_core/src/infer/status.rs | 3 +- crates/voicevox_core/src/metas.rs | 61 ++++++++++++------- crates/voicevox_core/src/voice_model.rs | 12 +++- .../tests/e2e/snapshots.toml | 24 ++++++-- .../jp/hiroshiba/voicevoxcore/VoiceModel.java | 24 ++++++++ .../python/voicevox_core/_models.py | 16 ++++- 6 files changed, 110 insertions(+), 30 deletions(-) diff --git a/crates/voicevox_core/src/infer/status.rs b/crates/voicevox_core/src/infer/status.rs index a9a1836e0..bef8889d3 100644 --- a/crates/voicevox_core/src/infer/status.rs +++ b/crates/voicevox_core/src/infer/status.rs @@ -8,6 +8,7 @@ use std::{ use anyhow::bail; use educe::Educe; use enum_map::{Enum as _, EnumMap}; +use indexmap::IndexMap; use itertools::{iproduct, Itertools as _}; use crate::{ @@ -119,7 +120,7 @@ impl<R: InferenceRuntime, D: InferenceDomain> Status<R, D> { #[derive(Educe)] #[educe(Default(bound = "R: InferenceRuntime, D: InferenceDomain"))] struct LoadedModels<R: InferenceRuntime, D: InferenceDomain>( - BTreeMap<VoiceModelId, LoadedModel<R, D>>, + IndexMap<VoiceModelId, LoadedModel<R, D>>, ); struct LoadedModel<R: InferenceRuntime, D: InferenceDomain> { diff --git a/crates/voicevox_core/src/metas.rs b/crates/voicevox_core/src/metas.rs index 7dd943de7..47682c2ad 100644 --- a/crates/voicevox_core/src/metas.rs +++ b/crates/voicevox_core/src/metas.rs @@ -2,33 +2,36 @@ use std::fmt::Display; use derive_getters::Getters; use derive_new::new; +use indexmap::{IndexMap, IndexSet}; use itertools::Itertools as _; use serde::{Deserialize, Serialize}; pub fn merge<'a>(metas: impl IntoIterator<Item = &'a SpeakerMeta>) -> Vec<SpeakerMeta> { - metas + return metas .into_iter() - .into_grouping_map_by(|speaker| &speaker.speaker_uuid) - .aggregate::<_, SpeakerMeta>(|acc, _, speaker| { - Some( - acc.map(|mut acc| { - acc.styles.extend(speaker.styles.clone()); - acc - }) - .unwrap_or_else(|| speaker.clone()), - ) + .fold(IndexMap::<_, SpeakerMeta>::new(), |mut acc, speaker| { + acc.entry(&speaker.speaker_uuid) + .and_modify(|acc| acc.styles.extend(speaker.styles.clone())) + .or_insert_with(|| speaker.clone()); + acc }) .into_values() - .map(|mut speaker| { - speaker - .styles - .sort_unstable_by_key(|&StyleMeta { id, .. }| id); - speaker + .update(|speaker| { + speaker.styles.sort_by_key(|StyleMeta { id, .. }| { + key(speaker + .style_order + .get_index_of(id) + .map(|i| i.try_into().unwrap())) + }); }) - .sorted_unstable_by_key(|SpeakerMeta { styles, .. }| { - styles.first().map(|&StyleMeta { id, .. }| id) - }) - .collect() + .sorted_by_key(|&SpeakerMeta { speaker_order, .. }| key(speaker_order)) + .collect(); + + fn key(order: Option<u32>) -> impl Ord { + order + .map(Into::into) + .unwrap_or_else(|| u64::from(u32::MAX) + 1) + } } /// [`StyleId`]の実体。 @@ -42,7 +45,7 @@ pub type RawStyleId = u32; /// /// [**話者**(_speaker_)]: SpeakerMeta /// [**スタイル**(_style_)]: StyleMeta -#[derive(PartialEq, Eq, Clone, Copy, Ord, PartialOrd, Deserialize, Serialize, new, Debug)] +#[derive(PartialEq, Eq, Clone, Copy, Ord, Hash, PartialOrd, Deserialize, Serialize, new, Debug)] pub struct StyleId(RawStyleId); impl StyleId { @@ -92,6 +95,17 @@ pub struct SpeakerMeta { version: StyleVersion, /// 話者のUUID。 speaker_uuid: String, + /// 話者の順番。 + /// + /// `SpeakerMeta`の列は、この値に対して昇順に並んでいるべきである。 + speaker_order: Option<u32>, + /// 話者に属するスタイルの順番。 + /// + /// [`styles`]はこの並びに沿うべきである。 + /// + /// [`styles`]: Self::styles + #[serde(default)] + style_order: IndexSet<StyleId>, } impl SpeakerMeta { @@ -101,6 +115,8 @@ impl SpeakerMeta { styles: _, version: version1, speaker_uuid: speaker_uuid1, + speaker_order: speaker_order1, + style_order: style_order1, } = self; let Self { @@ -108,9 +124,12 @@ impl SpeakerMeta { styles: _, version: version2, speaker_uuid: speaker_uuid2, + speaker_order: speaker_order2, + style_order: style_order2, } = other; - (name1, version1, speaker_uuid1) == (name2, version2, speaker_uuid2) + (name1, version1, speaker_uuid1, speaker_order1, style_order1) + == (name2, version2, speaker_uuid2, speaker_order2, style_order2) } } diff --git a/crates/voicevox_core/src/voice_model.rs b/crates/voicevox_core/src/voice_model.rs index 96bf481d1..fc8f4d20f 100644 --- a/crates/voicevox_core/src/voice_model.rs +++ b/crates/voicevox_core/src/voice_model.rs @@ -15,7 +15,17 @@ pub type RawVoiceModelId = String; /// 音声モデルID。 #[derive( - PartialEq, Eq, Clone, Ord, PartialOrd, Deserialize, new, Getters, derive_more::Display, Debug, + PartialEq, + Eq, + Clone, + Ord, + Hash, + PartialOrd, + Deserialize, + new, + Getters, + derive_more::Display, + Debug, )] pub struct VoiceModelId { raw_voice_model_id: RawVoiceModelId, diff --git a/crates/voicevox_core_c_api/tests/e2e/snapshots.toml b/crates/voicevox_core_c_api/tests/e2e/snapshots.toml index 8f3fa4f3b..d0e80f97f 100644 --- a/crates/voicevox_core_c_api/tests/e2e/snapshots.toml +++ b/crates/voicevox_core_c_api/tests/e2e/snapshots.toml @@ -10,7 +10,9 @@ metas = ''' } ], "version": "0.0.1", - "speaker_uuid": "574bc678-8370-44be-b941-08e46e7b47d7" + "speaker_uuid": "574bc678-8370-44be-b941-08e46e7b47d7", + "speaker_order": null, + "style_order": [] }, { "name": "dummy2", @@ -21,7 +23,9 @@ metas = ''' } ], "version": "0.0.1", - "speaker_uuid": "dd9ccd75-75f6-40ce-a3db-960cbed2e905" + "speaker_uuid": "dd9ccd75-75f6-40ce-a3db-960cbed2e905", + "speaker_order": null, + "style_order": [] }, { "name": "dummy3", @@ -36,7 +40,9 @@ metas = ''' } ], "version": "0.0.1", - "speaker_uuid": "5d3d9aa9-88e5-4a96-8ef7-f13a3cad1cb3" + "speaker_uuid": "5d3d9aa9-88e5-4a96-8ef7-f13a3cad1cb3", + "speaker_order": null, + "style_order": [] } ]''' stderr.windows = ''' @@ -93,7 +99,9 @@ metas = ''' } ], "version": "0.0.1", - "speaker_uuid": "574bc678-8370-44be-b941-08e46e7b47d7" + "speaker_uuid": "574bc678-8370-44be-b941-08e46e7b47d7", + "speaker_order": null, + "style_order": [] }, { "name": "dummy2", @@ -104,7 +112,9 @@ metas = ''' } ], "version": "0.0.1", - "speaker_uuid": "dd9ccd75-75f6-40ce-a3db-960cbed2e905" + "speaker_uuid": "dd9ccd75-75f6-40ce-a3db-960cbed2e905", + "speaker_order": null, + "style_order": [] }, { "name": "dummy3", @@ -119,7 +129,9 @@ metas = ''' } ], "version": "0.0.1", - "speaker_uuid": "5d3d9aa9-88e5-4a96-8ef7-f13a3cad1cb3" + "speaker_uuid": "5d3d9aa9-88e5-4a96-8ef7-f13a3cad1cb3", + "speaker_order": null, + "style_order": [] } ]''' stderr.windows = ''' diff --git a/crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/VoiceModel.java b/crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/VoiceModel.java index 05c1a11b2..0e5715e09 100644 --- a/crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/VoiceModel.java +++ b/crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/VoiceModel.java @@ -4,6 +4,8 @@ import com.google.gson.annotations.Expose; import com.google.gson.annotations.SerializedName; import jakarta.annotation.Nonnull; +import jakarta.annotation.Nullable; +import java.util.LinkedHashSet; /** 音声モデル。 */ public class VoiceModel extends Dll { @@ -68,6 +70,26 @@ public static class SpeakerMeta { @Nonnull public final String version; + /** + * 話者の順番。 + * + * <p>{@code SpeakerMeta}の列は、この値に対して昇順に並んでいるべきである。 + */ + @SerializedName("speaker_order") + @Expose + @Nullable + public final Integer speakerOrder; + + /** + * 話者に属するスタイルの順番。 + * + * <p>{@link #styles}はこの並びに沿うべきである。 + */ + @SerializedName("style_order") + @Expose + @Nonnull + public final LinkedHashSet<Integer> styleOrder; + private SpeakerMeta() { // GSONからコンストラクトするため、このメソッドは呼ばれることは無い。 // このメソッドは@Nonnullを満たすために必要。 @@ -75,6 +97,8 @@ private SpeakerMeta() { this.styles = new StyleMeta[0]; this.speakerUuid = ""; this.version = ""; + this.speakerOrder = null; + this.styleOrder = new LinkedHashSet<>(); } } diff --git a/crates/voicevox_core_python_api/python/voicevox_core/_models.py b/crates/voicevox_core_python_api/python/voicevox_core/_models.py index 195154629..9e11e8bd8 100644 --- a/crates/voicevox_core_python_api/python/voicevox_core/_models.py +++ b/crates/voicevox_core_python_api/python/voicevox_core/_models.py @@ -1,6 +1,6 @@ import dataclasses from enum import Enum -from typing import List, NewType, Optional +from typing import Dict, List, NewType, Optional import pydantic @@ -61,6 +61,20 @@ class SpeakerMeta: version: StyleVersion """話者のUUID。""" + speaker_order: Optional[int] = None + """ + 話者の順番。 + + ``SpeakerMeta`` の列は、この値に対して昇順に並んでいるべきである。 + """ + + style_order: List[StyleId] = dataclasses.field(default_factory=list) + """ + 話者に属するスタイルの順番。 + + :attr:`styles` はこの並びに沿うべきである。 + """ + @pydantic.dataclasses.dataclass class SupportedDevices: From e78dd4f5228664b1ccaf67d7ceb264d3ff0164d3 Mon Sep 17 00:00:00 2001 From: Ryo Yamashita <qryxip@gmail.com> Date: Sun, 28 Jan 2024 00:31:52 +0900 Subject: [PATCH 4/7] =?UTF-8?q?=E3=83=86=E3=82=B9=E3=83=88=E3=82=92?= =?UTF-8?q?=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/voicevox_core/src/metas.rs | 118 ++++++++++++++++++++++++++++++ 1 file changed, 118 insertions(+) diff --git a/crates/voicevox_core/src/metas.rs b/crates/voicevox_core/src/metas.rs index 47682c2ad..22426f5fe 100644 --- a/crates/voicevox_core/src/metas.rs +++ b/crates/voicevox_core/src/metas.rs @@ -141,3 +141,121 @@ pub struct StyleMeta { /// スタイル名。 name: String, } + +#[cfg(test)] +mod tests { + use once_cell::sync::Lazy; + use serde_json::json; + + #[test] + fn merge_works() -> anyhow::Result<()> { + static INPUT: Lazy<serde_json::Value> = Lazy::new(|| { + json!([ + { + "name": "B", + "styles": [ + { + "id": 3, + "name": "B_1" + } + ], + "version": "0.0.0", + "speaker_uuid": "f34ab151-c0f5-4e0a-9ad2-51ce30dba24d", + "speaker_order": 1, + "style_order": [ + 3 + ] + }, + { + "name": "A", + "styles": [ + { + "id": 2, + "name": "A_3" + } + ], + "version": "0.0.0", + "speaker_uuid": "d6fd707c-a451-48e9-8f00-fe9ee3bf6264", + "speaker_order": 0, + "style_order": [ + 1, + 0, + 2 + ] + }, + { + "name": "A", + "styles": [ + { + "id": 1, + "name": "A_1" + }, + { + "id": 0, + "name": "A_2" + } + ], + "version": "0.0.0", + "speaker_uuid": "d6fd707c-a451-48e9-8f00-fe9ee3bf6264", + "speaker_order": 0, + "style_order": [ + 1, + 0, + 2 + ] + } + ]) + }); + + static EXPECTED: Lazy<serde_json::Value> = Lazy::new(|| { + json!([ + { + "name": "A", + "styles": [ + { + "id": 1, + "name": "A_1" + }, + { + "id": 0, + "name": "A_2" + }, + { + "id": 2, + "name": "A_3" + } + ], + "version": "0.0.0", + "speaker_uuid": "d6fd707c-a451-48e9-8f00-fe9ee3bf6264", + "speaker_order": 0, + "style_order": [ + 1, + 0, + 2 + ] + }, + { + "name": "B", + "styles": [ + { + "id": 3, + "name": "B_1" + } + ], + "version": "0.0.0", + "speaker_uuid": "f34ab151-c0f5-4e0a-9ad2-51ce30dba24d", + "speaker_order": 1, + "style_order": [ + 3 + ] + } + ]) + }); + + let input = &serde_json::from_value::<Vec<_>>(INPUT.clone())?; + let actual = serde_json::to_value(&super::merge(input))?; + + pretty_assertions::assert_eq!(*EXPECTED, actual); + Ok(()) + } +} From 9d2b1a2427f8dbccc397626d77befa2977bc7ab1 Mon Sep 17 00:00:00 2001 From: Ryo Yamashita <qryxip@gmail.com> Date: Sun, 28 Jan 2024 00:42:33 +0900 Subject: [PATCH 5/7] =?UTF-8?q?`merge`=E3=81=ABdoc?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/voicevox_core/src/metas.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/crates/voicevox_core/src/metas.rs b/crates/voicevox_core/src/metas.rs index 22426f5fe..8cd67c976 100644 --- a/crates/voicevox_core/src/metas.rs +++ b/crates/voicevox_core/src/metas.rs @@ -6,6 +6,15 @@ use indexmap::{IndexMap, IndexSet}; use itertools::Itertools as _; use serde::{Deserialize, Serialize}; +/// [`speaker_uuid`]をキーとして複数の[`SpeakerMeta`]をマージする。 +/// +/// マージする際話者は[`speaker_order`]、スタイルは[`style_order`]をもとに安定ソートされる。 +/// `speaker_order`が無い話者と`style_order`に属さないスタイルは、そうでないものよりも後ろに +/// 置かれる。 +/// +/// [`speaker_uuid`]: SpeakerMeta::speaker_uuid +/// [`speaker_order`]: SpeakerMeta::speaker_order +/// [`style_order`]: SpeakerMeta::style_order pub fn merge<'a>(metas: impl IntoIterator<Item = &'a SpeakerMeta>) -> Vec<SpeakerMeta> { return metas .into_iter() From 25a918cdbb6ddb3c01f4b0a511195c81e5ad4902 Mon Sep 17 00:00:00 2001 From: Ryo Yamashita <qryxip@gmail.com> Date: Sun, 28 Jan 2024 00:56:31 +0900 Subject: [PATCH 6/7] =?UTF-8?q?=E8=A9=B1=E8=80=85=E6=83=85=E5=A0=B1?= =?UTF-8?q?=E3=81=8C=E9=A3=9F=E3=81=84=E9=81=95=E3=81=86=E3=82=84=E3=81=A4?= =?UTF-8?q?=E3=81=AFwarning=E6=AD=A2=E3=81=BE=E3=82=8A=E3=81=AB=E3=81=99?= =?UTF-8?q?=E3=82=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/voicevox_core/src/infer/status.rs | 14 ++++------ crates/voicevox_core/src/metas.rs | 34 +++++++++++++++++++++--- 2 files changed, 35 insertions(+), 13 deletions(-) diff --git a/crates/voicevox_core/src/infer/status.rs b/crates/voicevox_core/src/infer/status.rs index bef8889d3..db529e2e5 100644 --- a/crates/voicevox_core/src/infer/status.rs +++ b/crates/voicevox_core/src/infer/status.rs @@ -184,7 +184,6 @@ impl<R: InferenceRuntime, D: InferenceDomain> LoadedModels<R, D> { /// 次の場合にエラーを返す。 /// /// - 音声モデルIDかスタイルIDが`model_header`と重複するとき - /// - 同じ`speaker_uuid`で、スタイル以外でプロパティが異なっている話者があるとき fn ensure_acceptable(&self, model_header: &VoiceModelHeader) -> LoadModelResult<()> { let error = |context| LoadModelError { path: model_header.path.clone(), @@ -194,13 +193,10 @@ impl<R: InferenceRuntime, D: InferenceDomain> LoadedModels<R, D> { let loaded = self.speakers(); let external = model_header.metas.iter(); - if let Some((_loaded, _external)) = - iproduct!(loaded, external).find(|(loaded, external)| { - loaded.speaker_uuid() == external.speaker_uuid() - && !loaded.eq_except_styles(external) - }) - { - todo!("same `speaker_uuid` but different properties"); + for (loaded, external) in iproduct!(loaded, external) { + if loaded.speaker_uuid() == external.speaker_uuid() { + loaded.warn_diff_except_styles(external); + } } let loaded = self.styles(); @@ -252,7 +248,7 @@ impl<R: InferenceRuntime, D: InferenceDomain> LoadedModels<R, D> { Ok(()) } - fn speakers(&self) -> impl Iterator<Item = &SpeakerMeta> { + fn speakers(&self) -> impl Iterator<Item = &SpeakerMeta> + Clone { self.0.values().flat_map(|LoadedModel { metas, .. }| metas) } diff --git a/crates/voicevox_core/src/metas.rs b/crates/voicevox_core/src/metas.rs index 8cd67c976..ae47e1803 100644 --- a/crates/voicevox_core/src/metas.rs +++ b/crates/voicevox_core/src/metas.rs @@ -1,10 +1,11 @@ -use std::fmt::Display; +use std::fmt::{Debug, Display}; use derive_getters::Getters; use derive_new::new; use indexmap::{IndexMap, IndexSet}; use itertools::Itertools as _; use serde::{Deserialize, Serialize}; +use tracing::warn; /// [`speaker_uuid`]をキーとして複数の[`SpeakerMeta`]をマージする。 /// @@ -118,7 +119,10 @@ pub struct SpeakerMeta { } impl SpeakerMeta { - pub(crate) fn eq_except_styles(&self, other: &Self) -> bool { + /// # Panics + /// + /// `speaker_uuid`が異なるときパニックする。 + pub(crate) fn warn_diff_except_styles(&self, other: &Self) { let Self { name: name1, styles: _, @@ -137,8 +141,30 @@ impl SpeakerMeta { style_order: style_order2, } = other; - (name1, version1, speaker_uuid1, speaker_order1, style_order1) - == (name2, version2, speaker_uuid2, speaker_order2, style_order2) + if speaker_uuid1 != speaker_uuid2 { + panic!("must be equal: {speaker_uuid1} != {speaker_uuid2:?}"); + } + + warn_diff(speaker_uuid1, "name", name1, name2); + warn_diff(speaker_uuid1, "version", version1, version2); + warn_diff( + speaker_uuid1, + "speaker_order", + speaker_order1, + speaker_order2, + ); + warn_diff(speaker_uuid1, "style_order", style_order1, style_order2); + + fn warn_diff<T: PartialEq + Debug>( + speaker_uuid: &str, + field_name: &str, + left: &T, + right: &T, + ) { + if left != right { + warn!("`{speaker_uuid}`: different `{field_name}` ({left:?} != {right:?})"); + } + } } } From 40616781b590898efc7e9408c6c8dc189e751080 Mon Sep 17 00:00:00 2001 From: Ryo Yamashita <qryxip@gmail.com> Date: Wed, 7 Feb 2024 02:10:49 +0900 Subject: [PATCH 7/7] =?UTF-8?q?`StyleMeta`=E8=87=AA=E4=BD=93=E3=81=AB`orde?= =?UTF-8?q?r`=E3=82=92=E6=8C=81=E3=81=9F=E3=81=9B=E3=82=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crates/voicevox_core/src/metas.rs | 104 +++++++----------- .../tests/e2e/snapshots.toml | 42 +++---- .../jp/hiroshiba/voicevoxcore/VoiceModel.java | 29 +++-- .../python/voicevox_core/_models.py | 18 +-- 4 files changed, 82 insertions(+), 111 deletions(-) diff --git a/crates/voicevox_core/src/metas.rs b/crates/voicevox_core/src/metas.rs index ae47e1803..78314d52a 100644 --- a/crates/voicevox_core/src/metas.rs +++ b/crates/voicevox_core/src/metas.rs @@ -2,20 +2,17 @@ use std::fmt::{Debug, Display}; use derive_getters::Getters; use derive_new::new; -use indexmap::{IndexMap, IndexSet}; +use indexmap::IndexMap; use itertools::Itertools as _; use serde::{Deserialize, Serialize}; use tracing::warn; /// [`speaker_uuid`]をキーとして複数の[`SpeakerMeta`]をマージする。 /// -/// マージする際話者は[`speaker_order`]、スタイルは[`style_order`]をもとに安定ソートされる。 -/// `speaker_order`が無い話者と`style_order`に属さないスタイルは、そうでないものよりも後ろに -/// 置かれる。 +/// マージする際話者は[`SpeakerMeta::order`]、スタイルは[`StyleMeta::order`]をもとに安定ソートされる。 +/// `order`が無い話者とスタイルは、そうでないものよりも後ろに置かれる。 /// /// [`speaker_uuid`]: SpeakerMeta::speaker_uuid -/// [`speaker_order`]: SpeakerMeta::speaker_order -/// [`style_order`]: SpeakerMeta::style_order pub fn merge<'a>(metas: impl IntoIterator<Item = &'a SpeakerMeta>) -> Vec<SpeakerMeta> { return metas .into_iter() @@ -27,14 +24,11 @@ pub fn merge<'a>(metas: impl IntoIterator<Item = &'a SpeakerMeta>) -> Vec<Speake }) .into_values() .update(|speaker| { - speaker.styles.sort_by_key(|StyleMeta { id, .. }| { - key(speaker - .style_order - .get_index_of(id) - .map(|i| i.try_into().unwrap())) - }); + speaker + .styles + .sort_by_key(|&StyleMeta { order, .. }| key(order)); }) - .sorted_by_key(|&SpeakerMeta { speaker_order, .. }| key(speaker_order)) + .sorted_by_key(|&SpeakerMeta { order, .. }| key(order)) .collect(); fn key(order: Option<u32>) -> impl Ord { @@ -108,14 +102,7 @@ pub struct SpeakerMeta { /// 話者の順番。 /// /// `SpeakerMeta`の列は、この値に対して昇順に並んでいるべきである。 - speaker_order: Option<u32>, - /// 話者に属するスタイルの順番。 - /// - /// [`styles`]はこの並びに沿うべきである。 - /// - /// [`styles`]: Self::styles - #[serde(default)] - style_order: IndexSet<StyleId>, + order: Option<u32>, } impl SpeakerMeta { @@ -128,8 +115,7 @@ impl SpeakerMeta { styles: _, version: version1, speaker_uuid: speaker_uuid1, - speaker_order: speaker_order1, - style_order: style_order1, + order: order1, } = self; let Self { @@ -137,8 +123,7 @@ impl SpeakerMeta { styles: _, version: version2, speaker_uuid: speaker_uuid2, - speaker_order: speaker_order2, - style_order: style_order2, + order: order2, } = other; if speaker_uuid1 != speaker_uuid2 { @@ -147,13 +132,7 @@ impl SpeakerMeta { warn_diff(speaker_uuid1, "name", name1, name2); warn_diff(speaker_uuid1, "version", version1, version2); - warn_diff( - speaker_uuid1, - "speaker_order", - speaker_order1, - speaker_order2, - ); - warn_diff(speaker_uuid1, "style_order", style_order1, style_order2); + warn_diff(speaker_uuid1, "order", order1, order2); fn warn_diff<T: PartialEq + Debug>( speaker_uuid: &str, @@ -175,6 +154,10 @@ pub struct StyleMeta { id: StyleId, /// スタイル名。 name: String, + /// スタイルの順番。 + /// + /// [`SpeakerMeta::styles`]は、この値に対して昇順に並んでいるべきである。 + order: Option<u32>, } #[cfg(test)] @@ -191,53 +174,44 @@ mod tests { "styles": [ { "id": 3, - "name": "B_1" + "name": "B_1", + "order": 0 } ], "version": "0.0.0", "speaker_uuid": "f34ab151-c0f5-4e0a-9ad2-51ce30dba24d", - "speaker_order": 1, - "style_order": [ - 3 - ] + "order": 1 }, { "name": "A", "styles": [ { "id": 2, - "name": "A_3" + "name": "A_3", + "order": 2 } ], "version": "0.0.0", "speaker_uuid": "d6fd707c-a451-48e9-8f00-fe9ee3bf6264", - "speaker_order": 0, - "style_order": [ - 1, - 0, - 2 - ] + "order": 0 }, { "name": "A", "styles": [ { "id": 1, - "name": "A_1" + "name": "A_1", + "order": 0 }, { "id": 0, - "name": "A_2" + "name": "A_2", + "order": 1 } ], "version": "0.0.0", "speaker_uuid": "d6fd707c-a451-48e9-8f00-fe9ee3bf6264", - "speaker_order": 0, - "style_order": [ - 1, - 0, - 2 - ] + "order": 0 } ]) }); @@ -249,46 +223,42 @@ mod tests { "styles": [ { "id": 1, - "name": "A_1" + "name": "A_1", + "order": 0 }, { "id": 0, - "name": "A_2" + "name": "A_2", + "order": 1 }, { "id": 2, - "name": "A_3" + "name": "A_3", + "order": 2 } ], "version": "0.0.0", "speaker_uuid": "d6fd707c-a451-48e9-8f00-fe9ee3bf6264", - "speaker_order": 0, - "style_order": [ - 1, - 0, - 2 - ] + "order": 0 }, { "name": "B", "styles": [ { "id": 3, - "name": "B_1" + "name": "B_1", + "order": 0 } ], "version": "0.0.0", "speaker_uuid": "f34ab151-c0f5-4e0a-9ad2-51ce30dba24d", - "speaker_order": 1, - "style_order": [ - 3 - ] + "order": 1 } ]) }); let input = &serde_json::from_value::<Vec<_>>(INPUT.clone())?; - let actual = serde_json::to_value(&super::merge(input))?; + let actual = serde_json::to_value(super::merge(input))?; pretty_assertions::assert_eq!(*EXPECTED, actual); Ok(()) diff --git a/crates/voicevox_core_c_api/tests/e2e/snapshots.toml b/crates/voicevox_core_c_api/tests/e2e/snapshots.toml index d0e80f97f..25926487e 100644 --- a/crates/voicevox_core_c_api/tests/e2e/snapshots.toml +++ b/crates/voicevox_core_c_api/tests/e2e/snapshots.toml @@ -6,43 +6,44 @@ metas = ''' "styles": [ { "id": 0, - "name": "style1" + "name": "style1", + "order": null } ], "version": "0.0.1", "speaker_uuid": "574bc678-8370-44be-b941-08e46e7b47d7", - "speaker_order": null, - "style_order": [] + "order": null }, { "name": "dummy2", "styles": [ { "id": 1, - "name": "style2" + "name": "style2", + "order": null } ], "version": "0.0.1", "speaker_uuid": "dd9ccd75-75f6-40ce-a3db-960cbed2e905", - "speaker_order": null, - "style_order": [] + "order": null }, { "name": "dummy3", "styles": [ { "id": 302, - "name": "style3-1" + "name": "style3-1", + "order": null }, { "id": 303, - "name": "style3-2" + "name": "style3-2", + "order": null } ], "version": "0.0.1", "speaker_uuid": "5d3d9aa9-88e5-4a96-8ef7-f13a3cad1cb3", - "speaker_order": null, - "style_order": [] + "order": null } ]''' stderr.windows = ''' @@ -95,43 +96,44 @@ metas = ''' "styles": [ { "id": 0, - "name": "style1" + "name": "style1", + "order": null } ], "version": "0.0.1", "speaker_uuid": "574bc678-8370-44be-b941-08e46e7b47d7", - "speaker_order": null, - "style_order": [] + "order": null }, { "name": "dummy2", "styles": [ { "id": 1, - "name": "style2" + "name": "style2", + "order": null } ], "version": "0.0.1", "speaker_uuid": "dd9ccd75-75f6-40ce-a3db-960cbed2e905", - "speaker_order": null, - "style_order": [] + "order": null }, { "name": "dummy3", "styles": [ { "id": 302, - "name": "style3-1" + "name": "style3-1", + "order": null }, { "id": 303, - "name": "style3-2" + "name": "style3-2", + "order": null } ], "version": "0.0.1", "speaker_uuid": "5d3d9aa9-88e5-4a96-8ef7-f13a3cad1cb3", - "speaker_order": null, - "style_order": [] + "order": null } ]''' stderr.windows = ''' diff --git a/crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/VoiceModel.java b/crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/VoiceModel.java index 0e5715e09..ba4881566 100644 --- a/crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/VoiceModel.java +++ b/crates/voicevox_core_java_api/lib/src/main/java/jp/hiroshiba/voicevoxcore/VoiceModel.java @@ -5,7 +5,6 @@ import com.google.gson.annotations.SerializedName; import jakarta.annotation.Nonnull; import jakarta.annotation.Nullable; -import java.util.LinkedHashSet; /** 音声モデル。 */ public class VoiceModel extends Dll { @@ -75,20 +74,10 @@ public static class SpeakerMeta { * * <p>{@code SpeakerMeta}の列は、この値に対して昇順に並んでいるべきである。 */ - @SerializedName("speaker_order") + @SerializedName("order") @Expose @Nullable - public final Integer speakerOrder; - - /** - * 話者に属するスタイルの順番。 - * - * <p>{@link #styles}はこの並びに沿うべきである。 - */ - @SerializedName("style_order") - @Expose - @Nonnull - public final LinkedHashSet<Integer> styleOrder; + public final Integer order; private SpeakerMeta() { // GSONからコンストラクトするため、このメソッドは呼ばれることは無い。 @@ -97,8 +86,7 @@ private SpeakerMeta() { this.styles = new StyleMeta[0]; this.speakerUuid = ""; this.version = ""; - this.speakerOrder = null; - this.styleOrder = new LinkedHashSet<>(); + this.order = null; } } @@ -115,9 +103,20 @@ public static class StyleMeta { @Expose public final int id; + /** + * 話者の順番。 + * + * <p>{@link SpeakerMeta#styles}の列は、この値に対して昇順に並んでいるべきである。 + */ + @SerializedName("order") + @Expose + @Nullable + public final Integer order; + private StyleMeta() { this.name = ""; this.id = 0; + this.order = null; } } } diff --git a/crates/voicevox_core_python_api/python/voicevox_core/_models.py b/crates/voicevox_core_python_api/python/voicevox_core/_models.py index 9e11e8bd8..c72bbcbf0 100644 --- a/crates/voicevox_core_python_api/python/voicevox_core/_models.py +++ b/crates/voicevox_core_python_api/python/voicevox_core/_models.py @@ -1,6 +1,6 @@ import dataclasses from enum import Enum -from typing import Dict, List, NewType, Optional +from typing import List, NewType, Optional import pydantic @@ -44,6 +44,13 @@ class StyleMeta: id: StyleId """スタイルID。""" + order: Optional[int] = None + """ + 話者の順番。 + + :attr:`SpeakerMeta.styles` は、この値に対して昇順に並んでいるべきである。 + """ + @pydantic.dataclasses.dataclass class SpeakerMeta: @@ -61,20 +68,13 @@ class SpeakerMeta: version: StyleVersion """話者のUUID。""" - speaker_order: Optional[int] = None + order: Optional[int] = None """ 話者の順番。 ``SpeakerMeta`` の列は、この値に対して昇順に並んでいるべきである。 """ - style_order: List[StyleId] = dataclasses.field(default_factory=list) - """ - 話者に属するスタイルの順番。 - - :attr:`styles` はこの並びに沿うべきである。 - """ - @pydantic.dataclasses.dataclass class SupportedDevices: