[project-s] ダミーモデルの追加とテストの有効化 (#736)

* add models * remove comment out * fix merge miss * add tests to status * add onnx file to typos exclude files * rename model file * fmt --------- Co-authored-by: Hiroshiba <[email protected]>
VOICEVOX · Jan 28, 2024 · ca585b2 · ca585b2
1 parent 01c3a9b
commit ca585b2
Show file tree

Hide file tree

Showing 8 changed files with 177 additions and 121 deletions.
diff --git a/_typos.toml b/_typos.toml
@@ -8,4 +8,4 @@ NdArray="NdArray" # onnxruntime::session::NdArray
 [default.extend-words]
 
 [files]
-extend-exclude = ["*.svg"]
+extend-exclude = ["*.svg", "*.onnx"]
diff --git a/crates/voicevox_core/src/publish.rs b/crates/voicevox_core/src/publish.rs
@@ -1210,113 +1210,113 @@ mod tests {
         assert_eq!(result.unwrap().len(), F0_LENGTH * 256);
     }
 
-    // #[rstest]
-    // fn predict_sing_f0_works() {
-    //     let internal = VoicevoxCore::new_with_mutex();
-    //     internal
-    //         .lock()
-    //         .unwrap()
-    //         .initialize(InitializeOptions {
-    //             load_all_models: true,
-    //             acceleration_mode: AccelerationMode::Cpu,
-    //             ..Default::default()
-    //         })
-    //         .unwrap();
-
-    //     // 「テスト」という文章に対応する入力
-    //     let phoneme_vector = [0, 37, 14, 35, 6, 37, 30, 0];
-    //     let note_vector = [0, 30, 30, 40, 40, 50, 50, 0];
-
-    //     let sing_teacher_speaker_id = 6000;
-    //     let result = internal.lock().unwrap().predict_sing_f0(
-    //         &phoneme_vector,
-    //         &note_vector,
-    //         sing_teacher_speaker_id,
-    //     );
-
-    //     assert!(result.is_ok(), "{result:?}");
-    //     assert_eq!(result.unwrap().len(), phoneme_vector.len());
-    // }
-
-    // #[rstest]
-    // fn predict_sing_volume_works() {
-    //     let internal = VoicevoxCore::new_with_mutex();
-    //     internal
-    //         .lock()
-    //         .unwrap()
-    //         .initialize(InitializeOptions {
-    //             load_all_models: true,
-    //             acceleration_mode: AccelerationMode::Cpu,
-    //             ..Default::default()
-    //         })
-    //         .unwrap();
-
-    //     // 「テスト」という文章に対応する入力
-    //     let phoneme_vector = [0, 37, 14, 35, 6, 37, 30, 0];
-    //     let note_vector = [0, 30, 30, 40, 40, 50, 50, 0];
-    //     let f0_vector = [0., 5.905218, 5.905218, 0., 0., 5.565851, 5.565851, 0.];
-
-    //     let sing_teacher_speaker_id = 6000;
-    //     let result = internal.lock().unwrap().predict_sing_volume(
-    //         &phoneme_vector,
-    //         &note_vector,
-    //         &f0_vector,
-    //         sing_teacher_speaker_id,
-    //     );
-
-    //     assert!(result.is_ok(), "{result:?}");
-    //     assert_eq!(result.unwrap().len(), phoneme_vector.len());
-    // }
-
-    // #[rstest]
-    // fn sf_decode_works() {
-    //     let internal = VoicevoxCore::new_with_mutex();
-    //     internal
-    //         .lock()
-    //         .unwrap()
-    //         .initialize(InitializeOptions {
-    //             acceleration_mode: AccelerationMode::Cpu,
-    //             load_all_models: true,
-    //             ..Default::default()
-    //         })
-    //         .unwrap();
-
-    //     // 「テスト」という文章に対応する入力
-    //     const F0_LENGTH: usize = 69;
-    //     let mut f0 = [0.; F0_LENGTH];
-    //     f0[9..24].fill(5.905218);
-    //     f0[37..60].fill(5.565851);
-
-    //     let mut volume = [0.; F0_LENGTH];
-    //     volume[9..24].fill(0.5);
-    //     volume[24..37].fill(0.2);
-    //     volume[37..60].fill(1.0);
-
-    //     let mut phoneme = [0; F0_LENGTH];
-    //     let mut set_one = |index, range| {
-    //         for i in range {
-    //             phoneme[i] = index;
-    //         }
-    //     };
-    //     set_one(0, 0..9);
-    //     set_one(37, 9..13);
-    //     set_one(14, 13..24);
-    //     set_one(35, 24..30);
-    //     set_one(6, 30..37);
-    //     set_one(37, 37..45);
-    //     set_one(30, 45..60);
-    //     set_one(0, 60..69);
-
-    //     let sf_decoder_speaker_id = 3000;
-    //     let result =
-    //         internal
-    //             .lock()
-    //             .unwrap()
-    //             .sf_decode(&phoneme, &f0, &volume, sf_decoder_speaker_id);
-
-    //     assert!(result.is_ok(), "{result:?}");
-    //     assert_eq!(result.unwrap().len(), F0_LENGTH * 256);
-    // }
+    #[rstest]
+    fn predict_sing_f0_works() {
+        let internal = VoicevoxCore::new_with_mutex();
+        internal
+            .lock()
+            .unwrap()
+            .initialize(InitializeOptions {
+                load_all_models: true,
+                acceleration_mode: AccelerationMode::Cpu,
+                ..Default::default()
+            })
+            .unwrap();
+
+        // 「テスト」という文章に対応する入力
+        let phoneme_vector = [0, 37, 14, 35, 6, 37, 30, 0];
+        let note_vector = [0, 30, 30, 40, 40, 50, 50, 0];
+
+        let sing_teacher_speaker_id = 6000;
+        let result = internal.lock().unwrap().predict_sing_f0(
+            &phoneme_vector,
+            &note_vector,
+            sing_teacher_speaker_id,
+        );
+
+        assert!(result.is_ok(), "{result:?}");
+        assert_eq!(result.unwrap().len(), phoneme_vector.len());
+    }
+
+    #[rstest]
+    fn predict_sing_volume_works() {
+        let internal = VoicevoxCore::new_with_mutex();
+        internal
+            .lock()
+            .unwrap()
+            .initialize(InitializeOptions {
+                load_all_models: true,
+                acceleration_mode: AccelerationMode::Cpu,
+                ..Default::default()
+            })
+            .unwrap();
+
+        // 「テスト」という文章に対応する入力
+        let phoneme_vector = [0, 37, 14, 35, 6, 37, 30, 0];
+        let note_vector = [0, 30, 30, 40, 40, 50, 50, 0];
+        let f0_vector = [0., 5.905218, 5.905218, 0., 0., 5.565851, 5.565851, 0.];
+
+        let sing_teacher_speaker_id = 6000;
+        let result = internal.lock().unwrap().predict_sing_volume(
+            &phoneme_vector,
+            &note_vector,
+            &f0_vector,
+            sing_teacher_speaker_id,
+        );
+
+        assert!(result.is_ok(), "{result:?}");
+        assert_eq!(result.unwrap().len(), phoneme_vector.len());
+    }
+
+    #[rstest]
+    fn sf_decode_works() {
+        let internal = VoicevoxCore::new_with_mutex();
+        internal
+            .lock()
+            .unwrap()
+            .initialize(InitializeOptions {
+                acceleration_mode: AccelerationMode::Cpu,
+                load_all_models: true,
+                ..Default::default()
+            })
+            .unwrap();
+
+        // 「テスト」という文章に対応する入力
+        const F0_LENGTH: usize = 69;
+        let mut f0 = [0.; F0_LENGTH];
+        f0[9..24].fill(5.905218);
+        f0[37..60].fill(5.565851);
+
+        let mut volume = [0.; F0_LENGTH];
+        volume[9..24].fill(0.5);
+        volume[24..37].fill(0.2);
+        volume[37..60].fill(1.0);
+
+        let mut phoneme = [0; F0_LENGTH];
+        let mut set_one = |index, range| {
+            for i in range {
+                phoneme[i] = index;
+            }
+        };
+        set_one(0, 0..9);
+        set_one(37, 9..13);
+        set_one(14, 13..24);
+        set_one(35, 24..30);
+        set_one(6, 30..37);
+        set_one(37, 37..45);
+        set_one(30, 45..60);
+        set_one(0, 60..69);
+
+        let sf_decoder_speaker_id = 3000;
+        let result =
+            internal
+                .lock()
+                .unwrap()
+                .sf_decode(&phoneme, &f0, &volume, sf_decoder_speaker_id);
+
+        assert!(result.is_ok(), "{result:?}");
+        assert_eq!(result.unwrap().len(), F0_LENGTH * 256);
+    }
 
     type TextConsonantVowelData =
         [(&'static [(&'static str, &'static str, &'static str)], usize)];

diff --git a/crates/voicevox_core/src/status.rs b/crates/voicevox_core/src/status.rs
@@ -619,14 +619,21 @@ mod tests {
         assert!(status.talk_models.predict_duration.is_empty());
         assert!(status.talk_models.predict_intonation.is_empty());
         assert!(status.talk_models.decode.is_empty());
+        assert!(status
+            .sing_teacher_models
+            .predict_sing_consonant_length
+            .is_empty());
+        assert!(status.sing_teacher_models.predict_sing_f0.is_empty());
+        assert!(status.sing_teacher_models.predict_sing_volume.is_empty());
+        assert!(status.sf_decode_models.sf_decode.is_empty());
         assert!(status.supported_styles.is_empty());
     }
 
     #[rstest]
     fn status_load_metas_works() {
         let mut status = Status::new(true, 0);
         let result = status.load_metas();
-        assert_debug_fmt_eq!(Ok(()), result);
+        assert_eq!(Ok(()), result);
         let expected = BTreeSet::from([0, 1, 2, 3, 3000, 6000]);
         assert_eq!(expected, status.supported_styles);
     }
@@ -642,7 +649,7 @@ mod tests {
     fn status_load_talk_model_works() {
         let mut status = Status::new(false, 0);
         let result = status.load_talk_model(0);
-        assert_debug_fmt_eq!(Ok(()), result);
+        assert_eq!(Ok(()), result);
         assert_eq!(1, status.talk_models.predict_duration.len());
         assert_eq!(1, status.talk_models.predict_intonation.len());
         assert_eq!(1, status.talk_models.decode.len());
@@ -657,12 +664,66 @@ mod tests {
             "model should  not be loaded"
         );
         let result = status.load_talk_model(model_index);
-        assert_debug_fmt_eq!(Ok(()), result);
+        assert_eq!(Ok(()), result);
         assert!(
             status.is_talk_model_loaded(model_index),
             "model should be loaded"
         );
     }
 
-    // TODO: sing系のテスト足す
+    #[rstest]
+    fn status_load_sing_teacher_model_works() {
+        let mut status = Status::new(false, 0);
+        let result = status.load_sing_teacher_model(0);
+        assert_eq!(Ok(()), result);
+        assert_eq!(
+            1,
+            status
+                .sing_teacher_models
+                .predict_sing_consonant_length
+                .len()
+        );
+        assert_eq!(1, status.sing_teacher_models.predict_sing_f0.len());
+        assert_eq!(1, status.sing_teacher_models.predict_sing_volume.len());
+    }
+
+    #[rstest]
+    fn status_is_sing_teacher_model_loaded_works() {
+        let mut status = Status::new(false, 0);
+        let model_index = 0;
+        assert!(
+            !status.is_sing_teacher_model_loaded(model_index),
+            "model should  not be loaded"
+        );
+        let result = status.load_sing_teacher_model(model_index);
+        assert_eq!(Ok(()), result);
+        assert!(
+            status.is_sing_teacher_model_loaded(model_index),
+            "model should be loaded"
+        );
+    }
+
+    #[rstest]
+    fn status_load_sf_decode_model_works() {
+        let mut status = Status::new(false, 0);
+        let result = status.load_sf_decode_model(0);
+        assert_eq!(Ok(()), result);
+        assert_eq!(1, status.sf_decode_models.sf_decode.len());
+    }
+
+    #[rstest]
+    fn status_is_sf_decode_model_loaded_works() {
+        let mut status = Status::new(false, 0);
+        let model_index = 0;
+        assert!(
+            !status.is_sf_decode_model_loaded(model_index),
+            "model should  not be loaded"
+        );
+        let result = status.load_sf_decode_model(model_index);
+        assert_eq!(Ok(()), result);
+        assert!(
+            status.is_sf_decode_model_loaded(model_index),
+            "model should be loaded"
+        );
+    }
 }
diff --git a/crates/voicevox_core/src/status/model_file.rs b/crates/voicevox_core/src/status/model_file.rs
@@ -14,23 +14,18 @@ pub(super) const TALK_MODEL_FILE_NAMES: &[TalkModelFileNames] = &[TalkModelFileN
     decode_model: "decode-0.onnx",
 }];
 
-// TODO: 変更する
 pub(super) const SING_TEACHER_SPEAKER_ID_MAP: &[(u32, (usize, u32))] = &[(6000, (0, 0))];
 
 pub(super) const SING_TEACHER_MODEL_FILE_NAMES: &[SingTeacherModelFileNames] =
     &[SingTeacherModelFileNames {
-        predict_sing_consonant_length_model: "predict_duration-1.onnx",
-        predict_sing_f0_model: "predict_intonation-1.onnx",
-        predict_sing_volume_model: "predict_intonation-1.onnx",
-        // predict_sing_consonant_length_model: "predict_sing_consonant_length-0.onnx",
-        // predict_sing_f0_model: "predict_sing_f0-0.onnx",
-        // predict_sing_volume_model: "predict_sing_volume-0.onnx",
+        predict_sing_consonant_length_model: "predict_sing_consonant_length-0.onnx",
+        predict_sing_f0_model: "predict_sing_f0-0.onnx",
+        predict_sing_volume_model: "predict_sing_volume-0.onnx",
     }];
 
 pub(super) const SF_DECODE_SPEAKER_ID_MAP: &[(u32, (usize, u32))] = &[(3000, (0, 0))];
 
 pub(super) const SF_DECODE_MODEL_FILE_NAMES: &[SfDecodeModelFileNames] =
     &[SfDecodeModelFileNames {
-        sf_decode_model: "decode-1.onnx",
-        // sf_decode_model: "sf_decoder-0.onnx",
+        sf_decode_model: "sf_decode-0.onnx",
     }];
diff --git a/model/predict_sing_consonant_length-0.onnx b/model/predict_sing_consonant_length-0.onnx
diff --git a/model/predict_sing_f0-0.onnx b/model/predict_sing_f0-0.onnx
diff --git a/model/predict_sing_volume-0.onnx b/model/predict_sing_volume-0.onnx
diff --git a/model/sf_decode-0.onnx b/model/sf_decode-0.onnx