Skip to content

Commit

Permalink
スペクトログラムをndarrayで捌く
Browse files Browse the repository at this point in the history
  • Loading branch information
qryxip committed Jan 4, 2024
1 parent 8891bb8 commit 8904be2
Show file tree
Hide file tree
Showing 4 changed files with 106 additions and 59 deletions.
70 changes: 44 additions & 26 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ ndarray = "0.15.6"
ndarray-stats = "0.5.1"
num-traits = "0.2.15"
octocrab = { version = "0.19.0", default-features = false }
once_cell = "1.18.0"
once_cell = "1.19.0"
ouroboros = "0.18.0"
parse-display = "0.8.2"
pretty_assertions = "1.3.0"
Expand All @@ -60,7 +60,7 @@ pyo3-asyncio = "0.19.0"
pyo3-log = "0.9.0"
quote = "1.0.33"
rayon = "1.6.1"
regex = "1.10.0"
regex = "1.10.2"
reqwest = { version = "0.11.13", default-features = false }
rstest = "0.15.0"
serde = "1.0.145"
Expand Down Expand Up @@ -99,7 +99,7 @@ rev = "de226a26e8e18edbdb1d6f986afe37bbbf35fbf4"

[workspace.dependencies.world]
git = "https://github.com/White-Green/WORLD_rs.git"
rev = "2337a30bfa47eebd32ef418c60ae5c7b39e43b99"
rev = "37c0d11691afd42e37c627a2a964459c9eaf77b3"

[workspace.package]
version = "0.0.0"
Expand Down
2 changes: 1 addition & 1 deletion crates/voicevox_core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ tokio = { workspace = true, features = ["rt"] } # FIXME: feature-gateする
tracing.workspace = true
uuid = { workspace = true, features = ["v4", "serde"] }
voicevox_core_macros = { path = "../voicevox_core_macros" }
world.workspace = true
world = { workspace = true, features = ["ndarray"] }
zip.workspace = true

[dev-dependencies]
Expand Down
87 changes: 58 additions & 29 deletions crates/voicevox_core/src/engine/morph.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
use std::collections::BTreeMap;
use std::{collections::BTreeMap, iter};

use easy_ext::ext;
use ndarray::{Array, Array2};
use num_traits::Zero;
use world::{
signal_analyzer::{AnalyzeResult, SignalAnalyzerBuilder},
spectrogram_like::SpectrogramLike,
Expand Down Expand Up @@ -73,34 +76,20 @@ impl<'metas> MorphableStyles<'metas> {
synthesizer.synthesis_wave(audio_query, style_id, &Default::default())
})?;

let morph_param = MorphingParameter::new(waves);

let mut morph_spectrogram = SpectrogramLike::<f64>::new(
morph_param.base_spectrogram.time_axis_size(),
morph_param.base_spectrogram.frequency_axis_size(),
);

// FIXME: サイズ違いの場合は"resize"する
for (morph_spectrogram, (base_spectrogram, target_spectrogram)) in itertools::zip_eq(
morph_spectrogram.lines_mut(),
itertools::zip_eq(
morph_param.base_spectrogram.lines(),
morph_param.target_spectrogram.lines(),
),
) {
for (morph_spectrogram, (base_spectrogram, target_spectrogram)) in itertools::zip_eq(
morph_spectrogram,
itertools::zip_eq(base_spectrogram, target_spectrogram),
) {
*morph_spectrogram =
base_spectrogram * (1. - morph_rate) + target_spectrogram * morph_rate;
}
}
let MorphingParameter {
base_f0,
base_aperiodicity,
base_spectrogram,
target_spectrogram,
} = &MorphingParameter::new(waves);

let morph_spectrogram =
&(base_spectrogram * (1. - morph_rate) + target_spectrogram * morph_rate).into();

let wave = &world::synthesis::synthesis(
&morph_param.base_f0,
&morph_spectrogram,
&morph_param.base_aperiodicity,
base_f0,
morph_spectrogram,
base_aperiodicity,
None,
FRAME_PERIOD,
DEFAULT_SAMPLING_RATE,
Expand All @@ -118,15 +107,19 @@ impl<'metas> MorphableStyles<'metas> {
struct MorphingParameter {
base_f0: Box<[f64]>,
base_aperiodicity: SpectrogramLike<f64>,
base_spectrogram: SpectrogramLike<f64>,
target_spectrogram: SpectrogramLike<f64>,
base_spectrogram: Array2<f64>,
target_spectrogram: Array2<f64>,
}

impl MorphingParameter {
fn new(wave: &MorphingPair<Vec<f32>>) -> Self {
let (base_f0, base_spectrogram, base_aperiodicity) = analyze(&wave.base);
let (_, target_spectrogram, _) = analyze(&wave.target);

let base_spectrogram = Array::from(base_spectrogram);
let target_spectrogram =
Array::from(target_spectrogram).resize(base_spectrogram.dim());

Self {
base_f0,
base_aperiodicity,
Expand Down Expand Up @@ -199,6 +192,23 @@ impl MorphingPair<StyleId> {
}
}

#[ext(Array2Ext)]
impl<T: Zero + Copy> Array2<T> {
fn resize(self, (nrows, ncols): (usize, usize)) -> Self {
if self.dim() == (nrows, ncols) {
return self;
}

let mut ret = Array2::zeros((nrows, ncols));
for (ret, this) in iter::zip(ret.rows_mut(), self.rows()) {
for (ret, this) in iter::zip(ret, this) {
*ret = *this;
}
}
ret
}
}

mod permit {
use std::marker::PhantomData;

Expand Down Expand Up @@ -261,3 +271,22 @@ mod permit {
}
}
}

#[cfg(test)]
mod tests {
use ndarray::{array, Array2};
use rstest::rstest;

use super::Array2Ext as _;

#[rstest]
#[case(array![[1]], (2, 2), array![[1, 0], [0, 0]])]
#[case(array![[1, 1], [1, 1]], (1, 1), array![[1]])]
fn resize_works(
#[case] arr: Array2<i32>,
#[case] dim: (usize, usize),
#[case] expected: Array2<i32>,
) {
pretty_assertions::assert_eq!(expected, arr.resize(dim));
}
}

0 comments on commit 8904be2

Please sign in to comment.