Skip to content

Commit

Permalink
feat: pause_length{,_scale}をデフォルト値限定で受け入れる
Browse files Browse the repository at this point in the history
VOICEVOX/voicevox_engine#1308VOICEVOX/voicevox_engine#1425 の一部
を参考にコードを書いた。

TODO: @X-20A さんの許諾を取るか、"hihoライセンス"経由で取り込む旨を書く

Co-Authored-By: X-20A <[email protected]>
Co-Authored-By: sabonerune <[email protected]>
Refs: #874 (comment)
  • Loading branch information
3 people committed Nov 20, 2024
1 parent 918f226 commit df00650
Show file tree
Hide file tree
Showing 4 changed files with 160 additions and 1 deletion.
140 changes: 139 additions & 1 deletion crates/voicevox_core/src/engine/model.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
use serde::{Deserialize, Serialize};
use std::fmt;

use duplicate::duplicate_item;
use serde::{de, Deserialize, Deserializer, Serialize, Serializer};

/* 各フィールドのjsonフィールド名はsnake_caseとする*/

Expand Down Expand Up @@ -64,6 +67,20 @@ pub struct AudioQuery {
pub output_sampling_rate: u32,
/// 音声データをステレオ出力するか否か。
pub output_stereo: bool,
/// 句読点などの無音時間。`null`のときは無視される。デフォルト値は`null`。
#[serde(
default,
deserialize_with = "deserialize_pause_length",
serialize_with = "serialize_pause_length"
)]
pub pause_length: (),
/// 読点などの無音時間(倍率)。デフォルト値は`1`。
#[serde(
default,
deserialize_with = "deserialize_pause_length_scale",
serialize_with = "serialize_pause_length_scale"
)]
pub pause_length_scale: (),
/// \[読み取り専用\] AquesTalk風記法。
///
/// [`Synthesizer::audio_query`]が返すもののみ`Some`となる。入力としてのAudioQueryでは無視され
Expand All @@ -73,6 +90,87 @@ pub struct AudioQuery {
pub kana: Option<String>,
}

fn deserialize_pause_length<'de, D>(deserializer: D) -> Result<(), D::Error>
where
D: Deserializer<'de>,
{
return deserializer.deserialize_any(Visitor);

struct Visitor;

impl<'de> de::Visitor<'de> for Visitor {
type Value = ();

fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
formatter.write_str("`null`")
}

#[duplicate_item(
method T;
[ visit_i64 ] [ i64 ];
[ visit_u64 ] [ u64 ];
[ visit_f64 ] [ f64 ];
)]
fn method<E>(self, _: T) -> Result<Self::Value, E>
where
E: de::Error,
{
Err(E::custom("currently `pause_length` must be `null`"))
}

fn visit_unit<E>(self) -> Result<Self::Value, E> {
Ok(())
}
}
}

fn serialize_pause_length<S>(_: &(), serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_unit()
}

fn deserialize_pause_length_scale<'de, D>(deserializer: D) -> Result<(), D::Error>
where
D: Deserializer<'de>,
{
return deserializer.deserialize_any(Visitor);

struct Visitor;

impl<'de> de::Visitor<'de> for Visitor {
type Value = ();

fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
formatter.write_str("`1.`")
}

#[duplicate_item(
method T ONE;
[ visit_i64 ] [ i64 ] [ 1 ];
[ visit_u64 ] [ u64 ] [ 1 ];
[ visit_f64 ] [ f64 ] [ 1. ];
)]
fn method<E>(self, v: T) -> Result<Self::Value, E>
where
E: de::Error,
{
if v != ONE {
return Err(E::custom("currently `pause_length_scale` must be `1.`"));
}
Ok(())
}
}
}

fn serialize_pause_length_scale<S>(_: &(), serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
(1.).serialize(serializer)
}

impl AudioQuery {
pub(crate) fn with_kana(self, kana: Option<String>) -> Self {
Self { kana, ..self }
Expand All @@ -99,6 +197,8 @@ mod tests {
post_phoneme_length: 0.0,
output_sampling_rate: 0,
output_stereo: false,
pause_length: (),
pause_length_scale: (),
kana: None,
};
let val = serde_json::to_value(audio_query_model).unwrap();
Expand Down Expand Up @@ -152,4 +252,42 @@ mod tests {
}))?;
Ok(())
}

// TODO: 型的に自明になったらこのテストは削除する
#[rstest]
fn it_denies_non_null_for_pause_length() {
serde_json::from_value::<AudioQuery>(json!({
"accent_phrases": [],
"speed_scale": 1.0,
"pitch_scale": 0.0,
"intonation_scale": 1.0,
"volume_scale": 1.0,
"pre_phoneme_length": 0.1,
"post_phoneme_length": 0.1,
"output_sampling_rate": 24000,
"output_stereo": false,
"pause_length": "aaaaa"
}))
.map(|_| ())
.unwrap_err();
}

// TODO: 型的に自明になったらこのテストは削除する
#[rstest]
fn it_denies_non_float_for_pause_length_scale() {
serde_json::from_value::<AudioQuery>(json!({
"accent_phrases": [],
"speed_scale": 1.0,
"pitch_scale": 0.0,
"intonation_scale": 1.0,
"volume_scale": 1.0,
"pre_phoneme_length": 0.1,
"post_phoneme_length": 0.1,
"output_sampling_rate": 24000,
"output_stereo": false,
"pause_length_scale": "aaaaa",
}))
.map(|_| ())
.unwrap_err();
}
}
2 changes: 2 additions & 0 deletions crates/voicevox_core/src/synthesizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1185,6 +1185,8 @@ mod inner {
post_phoneme_length: 0.1,
output_sampling_rate: DEFAULT_SAMPLING_RATE,
output_stereo: false,
pause_length: (),
pause_length_scale: (),
kana: Some(kana),
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,17 @@ public class AudioQuery {
@Expose
public boolean outputStereo;

/** 句読点などの無音時間。{@code null}のときは無視される。デフォルト値は{@code null}。 */
@SerializedName("pause_length")
@Expose
@Nullable
public Double pauseLength;

/** 読点などの無音時間(倍率)。デフォルト値は{@code 1.}。 */
@SerializedName("pause_length_scale")
@Expose
public double pauseLengthScale;

/**
* [読み取り専用] AquesTalk風記法。
*
Expand All @@ -75,6 +86,8 @@ public AudioQuery() {
this.prePhonemeLength = 0.1;
this.postPhonemeLength = 0.1;
this.outputSamplingRate = 24000;
this.pauseLength = null;
this.pauseLengthScale = 1.0;
this.kana = null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,12 @@ class AudioQuery:
output_stereo: bool
"""音声データをステレオ出力するか否か。"""

pause_length: None = None
"""句読点などの無音時間。 ``None`` のときは無視される。デフォルト値は ``None`` 。"""

pause_length_scale: float = 1.0
"""読点などの無音時間(倍率)。デフォルト値は ``1.0`` 。"""

kana: Optional[str] = None
"""
[読み取り専用] AquesTalk風記法。
Expand Down

0 comments on commit df00650

Please sign in to comment.