Skip to content

Commit

Permalink
[project-s] ダミーモデルの追加とテストの有効化 (#736)
Browse files Browse the repository at this point in the history
* add models

* remove comment out

* fix merge miss

* add tests to status

* add onnx file to typos exclude files

* rename model file

* fmt

---------

Co-authored-by: Hiroshiba <[email protected]>
  • Loading branch information
y-chan and Hiroshiba authored Jan 28, 2024
1 parent 01c3a9b commit ca585b2
Show file tree
Hide file tree
Showing 8 changed files with 177 additions and 121 deletions.
2 changes: 1 addition & 1 deletion _typos.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ NdArray="NdArray" # onnxruntime::session::NdArray
[default.extend-words]

[files]
extend-exclude = ["*.svg"]
extend-exclude = ["*.svg", "*.onnx"]
214 changes: 107 additions & 107 deletions crates/voicevox_core/src/publish.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1210,113 +1210,113 @@ mod tests {
assert_eq!(result.unwrap().len(), F0_LENGTH * 256);
}

// #[rstest]
// fn predict_sing_f0_works() {
// let internal = VoicevoxCore::new_with_mutex();
// internal
// .lock()
// .unwrap()
// .initialize(InitializeOptions {
// load_all_models: true,
// acceleration_mode: AccelerationMode::Cpu,
// ..Default::default()
// })
// .unwrap();

// // 「テスト」という文章に対応する入力
// let phoneme_vector = [0, 37, 14, 35, 6, 37, 30, 0];
// let note_vector = [0, 30, 30, 40, 40, 50, 50, 0];

// let sing_teacher_speaker_id = 6000;
// let result = internal.lock().unwrap().predict_sing_f0(
// &phoneme_vector,
// &note_vector,
// sing_teacher_speaker_id,
// );

// assert!(result.is_ok(), "{result:?}");
// assert_eq!(result.unwrap().len(), phoneme_vector.len());
// }

// #[rstest]
// fn predict_sing_volume_works() {
// let internal = VoicevoxCore::new_with_mutex();
// internal
// .lock()
// .unwrap()
// .initialize(InitializeOptions {
// load_all_models: true,
// acceleration_mode: AccelerationMode::Cpu,
// ..Default::default()
// })
// .unwrap();

// // 「テスト」という文章に対応する入力
// let phoneme_vector = [0, 37, 14, 35, 6, 37, 30, 0];
// let note_vector = [0, 30, 30, 40, 40, 50, 50, 0];
// let f0_vector = [0., 5.905218, 5.905218, 0., 0., 5.565851, 5.565851, 0.];

// let sing_teacher_speaker_id = 6000;
// let result = internal.lock().unwrap().predict_sing_volume(
// &phoneme_vector,
// &note_vector,
// &f0_vector,
// sing_teacher_speaker_id,
// );

// assert!(result.is_ok(), "{result:?}");
// assert_eq!(result.unwrap().len(), phoneme_vector.len());
// }

// #[rstest]
// fn sf_decode_works() {
// let internal = VoicevoxCore::new_with_mutex();
// internal
// .lock()
// .unwrap()
// .initialize(InitializeOptions {
// acceleration_mode: AccelerationMode::Cpu,
// load_all_models: true,
// ..Default::default()
// })
// .unwrap();

// // 「テスト」という文章に対応する入力
// const F0_LENGTH: usize = 69;
// let mut f0 = [0.; F0_LENGTH];
// f0[9..24].fill(5.905218);
// f0[37..60].fill(5.565851);

// let mut volume = [0.; F0_LENGTH];
// volume[9..24].fill(0.5);
// volume[24..37].fill(0.2);
// volume[37..60].fill(1.0);

// let mut phoneme = [0; F0_LENGTH];
// let mut set_one = |index, range| {
// for i in range {
// phoneme[i] = index;
// }
// };
// set_one(0, 0..9);
// set_one(37, 9..13);
// set_one(14, 13..24);
// set_one(35, 24..30);
// set_one(6, 30..37);
// set_one(37, 37..45);
// set_one(30, 45..60);
// set_one(0, 60..69);

// let sf_decoder_speaker_id = 3000;
// let result =
// internal
// .lock()
// .unwrap()
// .sf_decode(&phoneme, &f0, &volume, sf_decoder_speaker_id);

// assert!(result.is_ok(), "{result:?}");
// assert_eq!(result.unwrap().len(), F0_LENGTH * 256);
// }
#[rstest]
fn predict_sing_f0_works() {
let internal = VoicevoxCore::new_with_mutex();
internal
.lock()
.unwrap()
.initialize(InitializeOptions {
load_all_models: true,
acceleration_mode: AccelerationMode::Cpu,
..Default::default()
})
.unwrap();

// 「テスト」という文章に対応する入力
let phoneme_vector = [0, 37, 14, 35, 6, 37, 30, 0];
let note_vector = [0, 30, 30, 40, 40, 50, 50, 0];

let sing_teacher_speaker_id = 6000;
let result = internal.lock().unwrap().predict_sing_f0(
&phoneme_vector,
&note_vector,
sing_teacher_speaker_id,
);

assert!(result.is_ok(), "{result:?}");
assert_eq!(result.unwrap().len(), phoneme_vector.len());
}

#[rstest]
fn predict_sing_volume_works() {
let internal = VoicevoxCore::new_with_mutex();
internal
.lock()
.unwrap()
.initialize(InitializeOptions {
load_all_models: true,
acceleration_mode: AccelerationMode::Cpu,
..Default::default()
})
.unwrap();

// 「テスト」という文章に対応する入力
let phoneme_vector = [0, 37, 14, 35, 6, 37, 30, 0];
let note_vector = [0, 30, 30, 40, 40, 50, 50, 0];
let f0_vector = [0., 5.905218, 5.905218, 0., 0., 5.565851, 5.565851, 0.];

let sing_teacher_speaker_id = 6000;
let result = internal.lock().unwrap().predict_sing_volume(
&phoneme_vector,
&note_vector,
&f0_vector,
sing_teacher_speaker_id,
);

assert!(result.is_ok(), "{result:?}");
assert_eq!(result.unwrap().len(), phoneme_vector.len());
}

#[rstest]
fn sf_decode_works() {
let internal = VoicevoxCore::new_with_mutex();
internal
.lock()
.unwrap()
.initialize(InitializeOptions {
acceleration_mode: AccelerationMode::Cpu,
load_all_models: true,
..Default::default()
})
.unwrap();

// 「テスト」という文章に対応する入力
const F0_LENGTH: usize = 69;
let mut f0 = [0.; F0_LENGTH];
f0[9..24].fill(5.905218);
f0[37..60].fill(5.565851);

let mut volume = [0.; F0_LENGTH];
volume[9..24].fill(0.5);
volume[24..37].fill(0.2);
volume[37..60].fill(1.0);

let mut phoneme = [0; F0_LENGTH];
let mut set_one = |index, range| {
for i in range {
phoneme[i] = index;
}
};
set_one(0, 0..9);
set_one(37, 9..13);
set_one(14, 13..24);
set_one(35, 24..30);
set_one(6, 30..37);
set_one(37, 37..45);
set_one(30, 45..60);
set_one(0, 60..69);

let sf_decoder_speaker_id = 3000;
let result =
internal
.lock()
.unwrap()
.sf_decode(&phoneme, &f0, &volume, sf_decoder_speaker_id);

assert!(result.is_ok(), "{result:?}");
assert_eq!(result.unwrap().len(), F0_LENGTH * 256);
}

type TextConsonantVowelData =
[(&'static [(&'static str, &'static str, &'static str)], usize)];
Expand Down
69 changes: 65 additions & 4 deletions crates/voicevox_core/src/status.rs
Original file line number Diff line number Diff line change
Expand Up @@ -619,14 +619,21 @@ mod tests {
assert!(status.talk_models.predict_duration.is_empty());
assert!(status.talk_models.predict_intonation.is_empty());
assert!(status.talk_models.decode.is_empty());
assert!(status
.sing_teacher_models
.predict_sing_consonant_length
.is_empty());
assert!(status.sing_teacher_models.predict_sing_f0.is_empty());
assert!(status.sing_teacher_models.predict_sing_volume.is_empty());
assert!(status.sf_decode_models.sf_decode.is_empty());
assert!(status.supported_styles.is_empty());
}

#[rstest]
fn status_load_metas_works() {
let mut status = Status::new(true, 0);
let result = status.load_metas();
assert_debug_fmt_eq!(Ok(()), result);
assert_eq!(Ok(()), result);
let expected = BTreeSet::from([0, 1, 2, 3, 3000, 6000]);
assert_eq!(expected, status.supported_styles);
}
Expand All @@ -642,7 +649,7 @@ mod tests {
fn status_load_talk_model_works() {
let mut status = Status::new(false, 0);
let result = status.load_talk_model(0);
assert_debug_fmt_eq!(Ok(()), result);
assert_eq!(Ok(()), result);
assert_eq!(1, status.talk_models.predict_duration.len());
assert_eq!(1, status.talk_models.predict_intonation.len());
assert_eq!(1, status.talk_models.decode.len());
Expand All @@ -657,12 +664,66 @@ mod tests {
"model should not be loaded"
);
let result = status.load_talk_model(model_index);
assert_debug_fmt_eq!(Ok(()), result);
assert_eq!(Ok(()), result);
assert!(
status.is_talk_model_loaded(model_index),
"model should be loaded"
);
}

// TODO: sing系のテスト足す
#[rstest]
fn status_load_sing_teacher_model_works() {
let mut status = Status::new(false, 0);
let result = status.load_sing_teacher_model(0);
assert_eq!(Ok(()), result);
assert_eq!(
1,
status
.sing_teacher_models
.predict_sing_consonant_length
.len()
);
assert_eq!(1, status.sing_teacher_models.predict_sing_f0.len());
assert_eq!(1, status.sing_teacher_models.predict_sing_volume.len());
}

#[rstest]
fn status_is_sing_teacher_model_loaded_works() {
let mut status = Status::new(false, 0);
let model_index = 0;
assert!(
!status.is_sing_teacher_model_loaded(model_index),
"model should not be loaded"
);
let result = status.load_sing_teacher_model(model_index);
assert_eq!(Ok(()), result);
assert!(
status.is_sing_teacher_model_loaded(model_index),
"model should be loaded"
);
}

#[rstest]
fn status_load_sf_decode_model_works() {
let mut status = Status::new(false, 0);
let result = status.load_sf_decode_model(0);
assert_eq!(Ok(()), result);
assert_eq!(1, status.sf_decode_models.sf_decode.len());
}

#[rstest]
fn status_is_sf_decode_model_loaded_works() {
let mut status = Status::new(false, 0);
let model_index = 0;
assert!(
!status.is_sf_decode_model_loaded(model_index),
"model should not be loaded"
);
let result = status.load_sf_decode_model(model_index);
assert_eq!(Ok(()), result);
assert!(
status.is_sf_decode_model_loaded(model_index),
"model should be loaded"
);
}
}
13 changes: 4 additions & 9 deletions crates/voicevox_core/src/status/model_file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,23 +14,18 @@ pub(super) const TALK_MODEL_FILE_NAMES: &[TalkModelFileNames] = &[TalkModelFileN
decode_model: "decode-0.onnx",
}];

// TODO: 変更する
pub(super) const SING_TEACHER_SPEAKER_ID_MAP: &[(u32, (usize, u32))] = &[(6000, (0, 0))];

pub(super) const SING_TEACHER_MODEL_FILE_NAMES: &[SingTeacherModelFileNames] =
&[SingTeacherModelFileNames {
predict_sing_consonant_length_model: "predict_duration-1.onnx",
predict_sing_f0_model: "predict_intonation-1.onnx",
predict_sing_volume_model: "predict_intonation-1.onnx",
// predict_sing_consonant_length_model: "predict_sing_consonant_length-0.onnx",
// predict_sing_f0_model: "predict_sing_f0-0.onnx",
// predict_sing_volume_model: "predict_sing_volume-0.onnx",
predict_sing_consonant_length_model: "predict_sing_consonant_length-0.onnx",
predict_sing_f0_model: "predict_sing_f0-0.onnx",
predict_sing_volume_model: "predict_sing_volume-0.onnx",
}];

pub(super) const SF_DECODE_SPEAKER_ID_MAP: &[(u32, (usize, u32))] = &[(3000, (0, 0))];

pub(super) const SF_DECODE_MODEL_FILE_NAMES: &[SfDecodeModelFileNames] =
&[SfDecodeModelFileNames {
sf_decode_model: "decode-1.onnx",
// sf_decode_model: "sf_decoder-0.onnx",
sf_decode_model: "sf_decode-0.onnx",
}];
Binary file added model/predict_sing_consonant_length-0.onnx
Binary file not shown.
Binary file added model/predict_sing_f0-0.onnx
Binary file not shown.
Binary file added model/predict_sing_volume-0.onnx
Binary file not shown.
Binary file added model/sf_decode-0.onnx
Binary file not shown.

0 comments on commit ca585b2

Please sign in to comment.