move wav_from_s16le to top module

VOICEVOX · Oct 18, 2024 · f2bcf65 · f2bcf65
1 parent 6db7b87
commit f2bcf65
Show file tree

Hide file tree

Showing 8 changed files with 30 additions and 58 deletions.
diff --git a/crates/voicevox_core/src/blocking.rs b/crates/voicevox_core/src/blocking.rs
@@ -2,9 +2,8 @@
 
 pub use crate::{
     engine::open_jtalk::blocking::OpenJtalk, infer::runtimes::onnxruntime::blocking::Onnxruntime,
-    synthesizer::blocking::wav_from_s16le, synthesizer::blocking::Audio,
-    synthesizer::blocking::Synthesizer, user_dict::dict::blocking::UserDict,
-    voice_model::blocking::VoiceModelFile,
+    synthesizer::blocking::Audio, synthesizer::blocking::Synthesizer,
+    user_dict::dict::blocking::UserDict, voice_model::blocking::VoiceModelFile,
 };
 
 pub mod onnxruntime {

diff --git a/crates/voicevox_core/src/engine/mod.rs b/crates/voicevox_core/src/engine/mod.rs
@@ -1,11 +1,13 @@
 mod acoustic_feature_extractor;
+mod audio_file;
 mod full_context_label;
 mod kana_parser;
 mod model;
 mod mora_list;
 pub(crate) mod open_jtalk;
 
 pub(crate) use self::acoustic_feature_extractor::OjtPhoneme;
+pub use self::audio_file::wav_from_s16le;
 pub(crate) use self::full_context_label::{
     extract_full_context_label, mora_to_text, FullContextLabelError,
 };

diff --git a/crates/voicevox_core/src/lib.rs b/crates/voicevox_core/src/lib.rs
@@ -83,7 +83,7 @@ use rstest_reuse;
 
 pub use self::{
     devices::SupportedDevices,
-    engine::{AccentPhrase, AudioQuery, FullcontextExtractor, Mora},
+    engine::{wav_from_s16le, AccentPhrase, AudioQuery, FullcontextExtractor, Mora},
     error::{Error, ErrorKind},
     metas::{
         RawStyleId, RawStyleVersion, SpeakerMeta, StyleId, StyleMeta, StyleType, StyleVersion,

diff --git a/crates/voicevox_core/src/synthesizer.rs b/crates/voicevox_core/src/synthesizer.rs
@@ -80,47 +80,13 @@ pub struct InitializeOptions {
 }
 
 pub(crate) mod blocking {
-    use std::io::{Cursor, Write as _};
-
-    /// 16bit PCMにヘッダを付加しWAVフォーマットのバイナリを生成する。
-    pub fn wav_from_s16le(pcm: &[u8], output_sampling_rate: u32, output_stereo: bool) -> Vec<u8> {
-        // TODO: 44.1kHzなどの対応
-
-        let num_channels: u16 = if output_stereo { 2 } else { 1 };
-        let bit_depth: u16 = 16;
-        let block_size: u16 = bit_depth * num_channels / 8;
-
-        let bytes_size = pcm.len() as u32;
-        let wave_size = bytes_size + 44;
-
-        let buf: Vec<u8> = Vec::with_capacity(wave_size as usize);
-        let mut cur = Cursor::new(buf);
-
-        cur.write_all("RIFF".as_bytes()).unwrap();
-        cur.write_all(&(wave_size - 8).to_le_bytes()).unwrap();
-        cur.write_all("WAVEfmt ".as_bytes()).unwrap();
-        cur.write_all(&16_u32.to_le_bytes()).unwrap(); // fmt header length
-        cur.write_all(&1_u16.to_le_bytes()).unwrap(); //linear PCM
-        cur.write_all(&num_channels.to_le_bytes()).unwrap();
-        cur.write_all(&output_sampling_rate.to_le_bytes()).unwrap();
-
-        let block_rate = output_sampling_rate * block_size as u32;
-
-        cur.write_all(&block_rate.to_le_bytes()).unwrap();
-        cur.write_all(&block_size.to_le_bytes()).unwrap();
-        cur.write_all(&bit_depth.to_le_bytes()).unwrap();
-        cur.write_all("data".as_bytes()).unwrap();
-        cur.write_all(&bytes_size.to_le_bytes()).unwrap();
-        cur.write_all(&pcm).unwrap();
-        cur.into_inner()
-    }
-
     use enum_map::enum_map;
+    use std::io::{Cursor, Write as _};
     use tracing::info;
 
     use crate::{
         devices::{DeviceSpec, GpuSpec},
-        engine::{create_kana, mora_to_text, Mora, OjtPhoneme},
+        engine::{create_kana, mora_to_text, wav_from_s16le, Mora, OjtPhoneme},
         error::ErrorRepr,
         infer::{
             domains::{

diff --git a/crates/voicevox_core_python_api/python/voicevox_core/__init__.py b/crates/voicevox_core_python_api/python/voicevox_core/__init__.py
@@ -35,12 +35,14 @@
     UseUserDictError,
     WordNotFoundError,
     __version__,
+    wav_from_s16le,
 )
 
 from . import asyncio, blocking  # noqa: F401 isort: skip
 
 __all__ = [
     "__version__",
+    "wav_from_s16le",
     "AccelerationMode",
     "AccentPhrase",
     "AudioQuery",

diff --git a/crates/voicevox_core_python_api/python/voicevox_core/_rust/__init__.pyi b/crates/voicevox_core_python_api/python/voicevox_core/_rust/__init__.pyi
@@ -102,3 +102,22 @@ class InvalidWordError(ValueError):
 
 def _validate_pronunciation(pronunciation: str) -> None: ...
 def _to_zenkaku(text: str) -> str: ...
+def wav_from_s16le(pcm: bytes, output_sampling_rate: int, output_stereo: bool) -> bytes:
+    """
+    16bit PCMにヘッダを付加しWAVフォーマットのバイナリを生成する。
+
+    Parameters
+    ----------
+    pcm : bytes
+        16bit PCMで表現された音声データ
+    output_sampling_rate: int
+        pcmのサンプリングレート
+    output_stereo: bool
+        pcmがステレオかどうか
+
+    Returns
+    -------
+    bytes
+        WAVフォーマットで表現された音声データ
+    """
+    ...
diff --git a/crates/voicevox_core_python_api/src/lib.rs b/crates/voicevox_core_python_api/src/lib.rs
@@ -264,21 +264,6 @@ fn _to_zenkaku(text: &str) -> PyResult<String> {
     Ok(voicevox_core::__internal::to_zenkaku(text))
 }
 
-/// 16bit PCMにヘッダを付加しWAVフォーマットのバイナリを生成する。
-///
-/// Parameters
-/// ----------
-/// pcm : bytes
-///     16bit PCMで表現された音声データ
-/// output_sampling_rate: int
-///     pcmのサンプリングレート
-/// output_stereo: bool
-///     pcmがステレオかどうか
-///
-/// Returns
-/// -------
-/// bytes
-///     WAVフォーマットで表現された音声データ
 #[pyfunction]
 fn wav_from_s16le<'py>(
     pcm: &[u8],
@@ -288,7 +273,7 @@ fn wav_from_s16le<'py>(
 ) -> &'py PyBytes {
     PyBytes::new(
         py,
-        &voicevox_core::blocking::wav_from_s16le(pcm, output_sampling_rate, output_stereo),
+        &voicevox_core::wav_from_s16le(pcm, output_sampling_rate, output_stereo),
     )
 }
 

diff --git a/example/python/run.py b/example/python/run.py
@@ -5,8 +5,7 @@
 from pathlib import Path
 from typing import Tuple
 
-from voicevox_core import AccelerationMode, AudioQuery
-from voicevox_core._rust import wav_from_s16le
+from voicevox_core import AccelerationMode, AudioQuery, wav_from_s16le
 from voicevox_core.blocking import Onnxruntime, OpenJtalk, Synthesizer, VoiceModelFile