Skip to content

Commit

Permalink
VVMが持つトーク用モデルをオプショナルにする
Browse files Browse the repository at this point in the history
  • Loading branch information
qryxip committed Mar 7, 2024
1 parent 0848630 commit ca6ce4a
Show file tree
Hide file tree
Showing 6 changed files with 63 additions and 33 deletions.
4 changes: 2 additions & 2 deletions crates/voicevox_core/src/infer/status.rs
Original file line number Diff line number Diff line change
Expand Up @@ -400,7 +400,7 @@ mod tests {
enum_map!(_ => InferenceSessionOptions::new(0, false)),
);
let model = &open_default_vvm_file().await;
let model_bytes = &model.read_inference_models().await.unwrap();
let model_bytes = &model.read_inference_models().await.unwrap().unwrap();
let result = status.insert_model(model.header(), model_bytes);
assert_debug_fmt_eq!(Ok(()), result);
assert_eq!(1, status.loaded_models.lock().unwrap().0.len());
Expand All @@ -414,7 +414,7 @@ mod tests {
);
let vvm = open_default_vvm_file().await;
let model_header = vvm.header();
let model_bytes = &vvm.read_inference_models().await.unwrap();
let model_bytes = &vvm.read_inference_models().await.unwrap().unwrap();
assert!(
!status.is_loaded_model(&model_header.id),
"model should not be loaded"
Expand Down
11 changes: 8 additions & 3 deletions crates/voicevox_core/src/manifest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,14 @@ pub struct Manifest {
#[allow(dead_code)]
manifest_version: ManifestVersion,
metas_filename: String,
decode_filename: String,
predict_duration_filename: String,
predict_intonation_filename: String,
talk_model_filenames: Option<TalkModelFilenames>,
#[serde(default)]
style_id_to_model_inner_id: BTreeMap<StyleId, ModelInnerId>,
}

#[derive(Deserialize, Clone)]
pub(crate) struct TalkModelFilenames {
pub(crate) predict_duration: String,
pub(crate) predict_intonation: String,
pub(crate) decode: String,
}
12 changes: 8 additions & 4 deletions crates/voicevox_core/src/synthesizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -201,8 +201,10 @@ pub(crate) mod blocking {

/// 音声モデルを読み込む。
pub fn load_voice_model(&self, model: &crate::blocking::VoiceModel) -> Result<()> {
let model_bytes = &model.read_inference_models()?;
self.status.insert_model(model.header(), model_bytes)
if let Some(model_bytes) = model.read_inference_models()? {
self.status.insert_model(model.header(), &model_bytes)?;
}
Ok(())
}

/// 音声モデルの読み込みを解除する。
Expand Down Expand Up @@ -1157,8 +1159,10 @@ pub(crate) mod tokio {
}

pub async fn load_voice_model(&self, model: &crate::tokio::VoiceModel) -> Result<()> {
let model_bytes = &model.read_inference_models().await?;
self.0.status.insert_model(model.header(), model_bytes)
if let Some(model_bytes) = model.read_inference_models().await? {
self.0.status.insert_model(model.header(), &model_bytes)?;
}
Ok(())
}

pub fn unload_voice_model(&self, voice_model_id: &VoiceModelId) -> Result<()> {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
{
"manifest_version": "0.0.0",
"metas_filename": "metas.json",
"decode_filename": "decode.onnx",
"predict_duration_filename": "predict_duration.onnx",
"predict_intonation_filename": "predict_intonation.onnx",
"talk_model_filenames": {
"predict_duration": "predict_duration.onnx",
"predict_intonation": "predict_intonation.onnx",
"decode": "decode.onnx"
},
"style_id_to_model_inner_id": {
"302": 2,
"303": 3
Expand Down
61 changes: 40 additions & 21 deletions crates/voicevox_core/src/voice_model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ pub(crate) mod blocking {
use crate::{
error::{LoadModelError, LoadModelErrorKind, LoadModelResult},
infer::domain::InferenceOperationImpl,
manifest::Manifest,
manifest::{Manifest, TalkModelFilenames},
VoiceModelMeta,
};

Expand All @@ -99,21 +99,30 @@ pub(crate) mod blocking {
impl self::VoiceModel {
pub(crate) fn read_inference_models(
&self,
) -> LoadModelResult<EnumMap<InferenceOperationImpl, Vec<u8>>> {
) -> LoadModelResult<Option<EnumMap<InferenceOperationImpl, Vec<u8>>>> {
let reader = BlockingVvmEntryReader::open(&self.header.path)?;

let model_bytes = [
self.header.manifest.predict_duration_filename(),
self.header.manifest.predict_intonation_filename(),
self.header.manifest.decode_filename(),
]
.into_par_iter()
.map(|filename| reader.read_vvm_entry(filename))
.collect::<std::result::Result<Vec<_>, _>>()?
.try_into()
.unwrap_or_else(|_| panic!("should be same length"));

Ok(EnumMap::from_array(model_bytes))
self.header
.manifest
.talk_model_filenames()
.as_ref()
.map(
|TalkModelFilenames {
predict_duration,
predict_intonation,
decode,
}| {
let model_bytes = [predict_duration, predict_intonation, decode]
.into_par_iter()
.map(|filename| reader.read_vvm_entry(filename))
.collect::<std::result::Result<Vec<_>, _>>()?
.try_into()
.unwrap_or_else(|_| panic!("should be same length"));

Ok(EnumMap::from_array(model_bytes))
},
)
.transpose()
}

/// VVMファイルから`VoiceModel`をコンストラクトする。
Expand Down Expand Up @@ -211,7 +220,7 @@ pub(crate) mod tokio {
use crate::{
error::{LoadModelError, LoadModelErrorKind, LoadModelResult},
infer::domain::InferenceOperationImpl,
manifest::Manifest,
manifest::{Manifest, TalkModelFilenames},
Result, VoiceModelMeta,
};

Expand All @@ -228,24 +237,34 @@ pub(crate) mod tokio {
impl self::VoiceModel {
pub(crate) async fn read_inference_models(
&self,
) -> LoadModelResult<EnumMap<InferenceOperationImpl, Vec<u8>>> {
) -> LoadModelResult<Option<EnumMap<InferenceOperationImpl, Vec<u8>>>> {
let reader = AsyncVvmEntryReader::open(&self.header.path).await?;

let Some(TalkModelFilenames {
predict_duration,
predict_intonation,
decode,
}) = self.header.manifest.talk_model_filenames()
else {
return Ok(None);
};

let (
decode_model_result,
predict_duration_model_result,
predict_intonation_model_result,
) = join3(
reader.read_vvm_entry(self.header.manifest.decode_filename()),
reader.read_vvm_entry(self.header.manifest.predict_duration_filename()),
reader.read_vvm_entry(self.header.manifest.predict_intonation_filename()),
reader.read_vvm_entry(decode),
reader.read_vvm_entry(predict_duration),
reader.read_vvm_entry(predict_intonation),
)
.await;

Ok(EnumMap::from_array([
Ok(Some(EnumMap::from_array([
predict_duration_model_result?,
predict_intonation_model_result?,
decode_model_result?,
]))
])))
}
/// VVMファイルから`VoiceModel`をコンストラクトする。
pub async fn from_path(path: impl AsRef<Path>) -> Result<Self> {
Expand Down
Binary file modified model/sample.vvm
Binary file not shown.

0 comments on commit ca6ce4a

Please sign in to comment.