Skip to content

Commit

Permalink
無音を無視するように変更
Browse files Browse the repository at this point in the history
  • Loading branch information
Hiroshiba committed Mar 10, 2018
1 parent ef2be3c commit f8823b1
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 8 deletions.
3 changes: 3 additions & 0 deletions become_yukarin/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,3 +145,6 @@ def backward_compatible(d: Dict):
d['model']['generator_extensive_layers'] = 8
d['model']['discriminator_base_channels'] = 32
d['model']['discriminator_extensive_layers'] = 5

if 'weak_discriminator' not in d['model']:
d['model']['weak_discriminator'] = False
9 changes: 5 additions & 4 deletions become_yukarin/voice_changer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import numpy

from become_yukarin.param import Param
from .acoustic_converter import AcousticConverter
from .data_struct import AcousticFeature
from .data_struct import Wave
Expand Down Expand Up @@ -78,10 +79,12 @@ def __init__(
self,
sampling_rate: int,
frame_period: float,
order: int,
in_dtype=numpy.float32,
):
self.sampling_rate = sampling_rate
self.frame_period = frame_period
self.order = order
self.in_dtype = in_dtype

self.voice_changer: VoiceChanger = None
Expand Down Expand Up @@ -189,8 +192,7 @@ def pre_convert(self, start_time: float, time_length: float, extra_time: float):
return in_feature

def convert(self, start_time: float, time_length: float, extra_time: float):
order = self.voice_changer.acoustic_converter.config.dataset.param.acoustic_feature_param.order
sizes = AcousticFeature.get_sizes(sampling_rate=self.sampling_rate, order=order)
sizes = AcousticFeature.get_sizes(sampling_rate=self.sampling_rate, order=self.order)
keys = ['f0', 'aperiodicity', 'mfcc', 'voiced']
in_feature = self.fetch(
start_time=start_time,
Expand All @@ -209,8 +211,7 @@ def convert(self, start_time: float, time_length: float, extra_time: float):
return out_feature

def post_convert(self, start_time: float, time_length: float):
order = self.voice_changer.acoustic_converter.config.dataset.param.acoustic_feature_param.order
sizes = AcousticFeature.get_sizes(sampling_rate=self.sampling_rate, order=order)
sizes = AcousticFeature.get_sizes(sampling_rate=self.sampling_rate, order=self.order)
keys = ['f0', 'aperiodicity', 'spectrogram', 'voiced']
out_feature = self.fetch(
start_time=start_time,
Expand Down
12 changes: 8 additions & 4 deletions scripts/realtime_voice_changer.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
import librosa
import world4py

world4py._WORLD_LIBRARY_PATH = 'x64_world.dll'

from functools import partial
from pathlib import Path
import signal
import time
from typing import NamedTuple
from multiprocessing import Queue
from multiprocessing import Process
Expand Down Expand Up @@ -34,6 +32,7 @@ class AudioConfig(NamedTuple):
convert_chunk: int
vocoder_buffer_size: int
out_norm: float
silent_threshold: float


def encode_worker(
Expand Down Expand Up @@ -123,7 +122,10 @@ def decode_worker(
wave_fragment = numpy.concatenate([wave_fragment, wave])
if len(wave_fragment) >= audio_config.audio_chunk:
wave, wave_fragment = wave_fragment[:audio_config.audio_chunk], wave_fragment[audio_config.audio_chunk:]
queue_output.put(wave)

power = librosa.core.power_to_db(numpy.abs(librosa.stft(wave)) ** 2).mean()
if power >= audio_config.silent_threshold:
queue_output.put(wave)


def main():
Expand Down Expand Up @@ -154,11 +156,13 @@ def main():
convert_chunk=config.dataset.param.voice_param.sample_rate,
vocoder_buffer_size=config.dataset.param.voice_param.sample_rate // 16,
out_norm=2.5,
silent_threshold=-99.0,
)

voice_changer_stream = VoiceChangerStream(
sampling_rate=audio_config.rate,
frame_period=config.dataset.param.acoustic_feature_param.frame_period,
order=config.dataset.param.acoustic_feature_param.order,
in_dtype=numpy.float32,
)

Expand Down
1 change: 1 addition & 0 deletions tests/test_voice_changer.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ class AudioConfig(NamedTuple):
voice_changer_stream = VoiceChangerStream(
sampling_rate=audio_config.rate,
frame_period=acoustic_converter._param.acoustic_feature_param.frame_period,
order=acoustic_converter._param.acoustic_feature_param.order,
in_dtype=numpy.float32,
)

Expand Down

0 comments on commit f8823b1

Please sign in to comment.