Skip to content

Commit

Permalink
move wav_from_s16le to top module
Browse files Browse the repository at this point in the history
  • Loading branch information
Yosshi999 committed Oct 18, 2024
1 parent 6db7b87 commit f2bcf65
Show file tree
Hide file tree
Showing 8 changed files with 30 additions and 58 deletions.
5 changes: 2 additions & 3 deletions crates/voicevox_core/src/blocking.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@
pub use crate::{
engine::open_jtalk::blocking::OpenJtalk, infer::runtimes::onnxruntime::blocking::Onnxruntime,
synthesizer::blocking::wav_from_s16le, synthesizer::blocking::Audio,
synthesizer::blocking::Synthesizer, user_dict::dict::blocking::UserDict,
voice_model::blocking::VoiceModelFile,
synthesizer::blocking::Audio, synthesizer::blocking::Synthesizer,
user_dict::dict::blocking::UserDict, voice_model::blocking::VoiceModelFile,
};

pub mod onnxruntime {
Expand Down
2 changes: 2 additions & 0 deletions crates/voicevox_core/src/engine/mod.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
mod acoustic_feature_extractor;
mod audio_file;
mod full_context_label;
mod kana_parser;
mod model;
mod mora_list;
pub(crate) mod open_jtalk;

pub(crate) use self::acoustic_feature_extractor::OjtPhoneme;
pub use self::audio_file::wav_from_s16le;
pub(crate) use self::full_context_label::{
extract_full_context_label, mora_to_text, FullContextLabelError,
};
Expand Down
2 changes: 1 addition & 1 deletion crates/voicevox_core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ use rstest_reuse;

pub use self::{
devices::SupportedDevices,
engine::{AccentPhrase, AudioQuery, FullcontextExtractor, Mora},
engine::{wav_from_s16le, AccentPhrase, AudioQuery, FullcontextExtractor, Mora},
error::{Error, ErrorKind},
metas::{
RawStyleId, RawStyleVersion, SpeakerMeta, StyleId, StyleMeta, StyleType, StyleVersion,
Expand Down
38 changes: 2 additions & 36 deletions crates/voicevox_core/src/synthesizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,47 +80,13 @@ pub struct InitializeOptions {
}

pub(crate) mod blocking {
use std::io::{Cursor, Write as _};

/// 16bit PCMにヘッダを付加しWAVフォーマットのバイナリを生成する。
pub fn wav_from_s16le(pcm: &[u8], output_sampling_rate: u32, output_stereo: bool) -> Vec<u8> {
// TODO: 44.1kHzなどの対応

let num_channels: u16 = if output_stereo { 2 } else { 1 };
let bit_depth: u16 = 16;
let block_size: u16 = bit_depth * num_channels / 8;

let bytes_size = pcm.len() as u32;
let wave_size = bytes_size + 44;

let buf: Vec<u8> = Vec::with_capacity(wave_size as usize);
let mut cur = Cursor::new(buf);

cur.write_all("RIFF".as_bytes()).unwrap();
cur.write_all(&(wave_size - 8).to_le_bytes()).unwrap();
cur.write_all("WAVEfmt ".as_bytes()).unwrap();
cur.write_all(&16_u32.to_le_bytes()).unwrap(); // fmt header length
cur.write_all(&1_u16.to_le_bytes()).unwrap(); //linear PCM
cur.write_all(&num_channels.to_le_bytes()).unwrap();
cur.write_all(&output_sampling_rate.to_le_bytes()).unwrap();

let block_rate = output_sampling_rate * block_size as u32;

cur.write_all(&block_rate.to_le_bytes()).unwrap();
cur.write_all(&block_size.to_le_bytes()).unwrap();
cur.write_all(&bit_depth.to_le_bytes()).unwrap();
cur.write_all("data".as_bytes()).unwrap();
cur.write_all(&bytes_size.to_le_bytes()).unwrap();
cur.write_all(&pcm).unwrap();
cur.into_inner()
}

use enum_map::enum_map;
use std::io::{Cursor, Write as _};
use tracing::info;

use crate::{
devices::{DeviceSpec, GpuSpec},
engine::{create_kana, mora_to_text, Mora, OjtPhoneme},
engine::{create_kana, mora_to_text, wav_from_s16le, Mora, OjtPhoneme},
error::ErrorRepr,
infer::{
domains::{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,14 @@
UseUserDictError,
WordNotFoundError,
__version__,
wav_from_s16le,
)

from . import asyncio, blocking # noqa: F401 isort: skip

__all__ = [
"__version__",
"wav_from_s16le",
"AccelerationMode",
"AccentPhrase",
"AudioQuery",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,3 +102,22 @@ class InvalidWordError(ValueError):

def _validate_pronunciation(pronunciation: str) -> None: ...
def _to_zenkaku(text: str) -> str: ...
def wav_from_s16le(pcm: bytes, output_sampling_rate: int, output_stereo: bool) -> bytes:
"""
16bit PCMにヘッダを付加しWAVフォーマットのバイナリを生成する。
Parameters
----------
pcm : bytes
16bit PCMで表現された音声データ
output_sampling_rate: int
pcmのサンプリングレート
output_stereo: bool
pcmがステレオかどうか
Returns
-------
bytes
WAVフォーマットで表現された音声データ
"""
...
17 changes: 1 addition & 16 deletions crates/voicevox_core_python_api/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -264,21 +264,6 @@ fn _to_zenkaku(text: &str) -> PyResult<String> {
Ok(voicevox_core::__internal::to_zenkaku(text))
}

/// 16bit PCMにヘッダを付加しWAVフォーマットのバイナリを生成する。
///
/// Parameters
/// ----------
/// pcm : bytes
/// 16bit PCMで表現された音声データ
/// output_sampling_rate: int
/// pcmのサンプリングレート
/// output_stereo: bool
/// pcmがステレオかどうか
///
/// Returns
/// -------
/// bytes
/// WAVフォーマットで表現された音声データ
#[pyfunction]
fn wav_from_s16le<'py>(
pcm: &[u8],
Expand All @@ -288,7 +273,7 @@ fn wav_from_s16le<'py>(
) -> &'py PyBytes {
PyBytes::new(
py,
&voicevox_core::blocking::wav_from_s16le(pcm, output_sampling_rate, output_stereo),
&voicevox_core::wav_from_s16le(pcm, output_sampling_rate, output_stereo),
)
}

Expand Down
3 changes: 1 addition & 2 deletions example/python/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
from pathlib import Path
from typing import Tuple

from voicevox_core import AccelerationMode, AudioQuery
from voicevox_core._rust import wav_from_s16le
from voicevox_core import AccelerationMode, AudioQuery, wav_from_s16le
from voicevox_core.blocking import Onnxruntime, OpenJtalk, Synthesizer, VoiceModelFile


Expand Down

0 comments on commit f2bcf65

Please sign in to comment.