Skip to content

Commit

Permalink
リアルタイム機能を切り分け
Browse files Browse the repository at this point in the history
  • Loading branch information
Hiroshiba committed Mar 11, 2018
1 parent f8823b1 commit 210e822
Show file tree
Hide file tree
Showing 3 changed files with 0 additions and 607 deletions.
252 changes: 0 additions & 252 deletions become_yukarin/voice_changer.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,8 @@
from abc import ABCMeta, abstractproperty, abstractmethod
from typing import List, Callable, Any
from typing import NamedTuple

import numpy

from become_yukarin.param import Param
from .acoustic_converter import AcousticConverter
from .data_struct import AcousticFeature
from .data_struct import Wave
from .super_resolution import SuperResolution
from .vocoder import Vocoder


class VoiceChanger(object):
Expand All @@ -31,248 +24,3 @@ def convert_from_acoustic_feature(self, f_in: AcousticFeature):
s_high = self.super_resolution.convert(f_low.spectrogram.astype(numpy.float32))
f_high = self.super_resolution.convert_to_feature(s_high, f_low)
return f_high


class BaseSegment(ABCMeta):
start_time: float

@property
@abstractmethod
def time_length(self) -> float:
pass

@property
@abstractmethod
def end_time(self) -> float:
pass


class FeatureSegment(NamedTuple, BaseSegment):
start_time: float
feature: AcousticFeature
frame_period: float

@property
def time_length(self):
return len(self.feature.f0) * self.frame_period / 1000

@property
def end_time(self):
return self.time_length + self.start_time


class WaveSegment(NamedTuple, BaseSegment):
start_time: float
wave: Wave

@property
def time_length(self):
return len(self.wave.wave) / self.wave.sampling_rate

@property
def end_time(self):
return self.time_length + self.start_time


class VoiceChangerStream(object):
def __init__(
self,
sampling_rate: int,
frame_period: float,
order: int,
in_dtype=numpy.float32,
):
self.sampling_rate = sampling_rate
self.frame_period = frame_period
self.order = order
self.in_dtype = in_dtype

self.voice_changer: VoiceChanger = None
self.vocoder: Vocoder = None
self._data_stream = [] # type: List[WaveSegment]
self._in_feature_stream = [] # type: List[FeatureSegment]
self._out_feature_stream = [] # type: List[FeatureSegment]

def add_wave(self, start_time: float, wave: Wave):
# validation
assert wave.sampling_rate == self.sampling_rate
assert wave.wave.dtype == self.in_dtype

segment = WaveSegment(start_time=start_time, wave=wave)
self._data_stream.append(segment)

def add_in_feature(self, start_time: float, feature: AcousticFeature, frame_period: float):
# validation
assert frame_period == self.frame_period
assert feature.f0.dtype == self.in_dtype

segment = FeatureSegment(start_time=start_time, feature=feature, frame_period=self.frame_period)
self._in_feature_stream.append(segment)

def add_out_feature(self, start_time: float, feature: AcousticFeature, frame_period: float):
# validation
assert frame_period == self.frame_period

segment = FeatureSegment(start_time=start_time, feature=feature, frame_period=self.frame_period)
self._out_feature_stream.append(segment)

def remove(self, end_time: float):
self._data_stream = list(filter(lambda s: s.end_time > end_time, self._data_stream))
self._in_feature_stream = list(filter(lambda s: s.end_time > end_time, self._in_feature_stream))
self._out_feature_stream = list(filter(lambda s: s.end_time > end_time, self._out_feature_stream))

@staticmethod
def fetch(
start_time: float,
time_length: float,
data_stream: List[BaseSegment],
rate: float,
pad_function: Callable[[int], Any],
pick_function: Callable[[Any, int, int], Any],
concat_function: Callable[[List], Any],
extra_time: float = 0,
):
start_time -= extra_time
time_length += extra_time * 2

end_time = start_time + time_length
buffer_list = []
stream = filter(lambda s: not (end_time < s.start_time or s.end_time < start_time), data_stream)

start_time_buffer = start_time
remaining_time = time_length
for segment in stream:
# padding
if segment.start_time > start_time_buffer:
length = int((segment.start_time - start_time_buffer) * rate)
pad = pad_function(length)
buffer_list.append(pad)
start_time_buffer = segment.start_time

if remaining_time > segment.end_time - start_time_buffer:
one_time_length = segment.end_time - start_time_buffer
else:
one_time_length = remaining_time

first_index = int((start_time_buffer - segment.start_time) * rate)
last_index = int(first_index + one_time_length * rate)
one_buffer = pick_function(segment, first_index, last_index)
buffer_list.append(one_buffer)

start_time_buffer += one_time_length
remaining_time -= one_time_length

if start_time_buffer >= end_time:
break
else:
# last padding
length = int((end_time - start_time_buffer) * rate)
pad = pad_function(length)
buffer_list.append(pad)

buffer = concat_function(buffer_list)
return buffer

def pre_convert(self, start_time: float, time_length: float, extra_time: float):
wave = self.fetch(
start_time=start_time,
time_length=time_length,
extra_time=extra_time,
data_stream=self._data_stream,
rate=self.sampling_rate,
pad_function=lambda length: numpy.zeros(shape=length, dtype=self.in_dtype),
pick_function=lambda segment, first, last: segment.wave.wave[first:last],
concat_function=numpy.concatenate,
)
in_wave = Wave(wave=wave, sampling_rate=self.sampling_rate)
in_feature = self.vocoder.encode(in_wave)

pad = int(extra_time / (self.vocoder.acoustic_feature_param.frame_period / 1000))
in_feature = in_feature.pick(pad, -pad)
return in_feature

def convert(self, start_time: float, time_length: float, extra_time: float):
sizes = AcousticFeature.get_sizes(sampling_rate=self.sampling_rate, order=self.order)
keys = ['f0', 'aperiodicity', 'mfcc', 'voiced']
in_feature = self.fetch(
start_time=start_time,
time_length=time_length,
extra_time=extra_time,
data_stream=self._in_feature_stream,
rate=1000 / self.frame_period,
pad_function=lambda length: AcousticFeature.silent(length, sizes=sizes, keys=keys),
pick_function=lambda segment, first, last: segment.feature.pick(first, last),
concat_function=lambda buffers: AcousticFeature.concatenate(buffers, keys=keys),
)
out_feature = self.voice_changer.convert_from_acoustic_feature(in_feature)

pad = int(extra_time * 1000 / self.frame_period)
out_feature = out_feature.pick(pad, -pad)
return out_feature

def post_convert(self, start_time: float, time_length: float):
sizes = AcousticFeature.get_sizes(sampling_rate=self.sampling_rate, order=self.order)
keys = ['f0', 'aperiodicity', 'spectrogram', 'voiced']
out_feature = self.fetch(
start_time=start_time,
time_length=time_length,
data_stream=self._out_feature_stream,
rate=1000 / self.frame_period,
pad_function=lambda length: AcousticFeature.silent(length, sizes=sizes, keys=keys),
pick_function=lambda segment, first, last: segment.feature.pick(first, last),
concat_function=lambda buffers: AcousticFeature.concatenate(buffers, keys=keys),
)

out_wave = self.vocoder.decode(
acoustic_feature=out_feature,
)
return out_wave


class VoiceChangerStreamWrapper(object):
def __init__(
self,
voice_changer_stream: VoiceChangerStream,
extra_time_pre: float = 0.0,
extra_time: float = 0.0,
):
self.voice_changer_stream = voice_changer_stream
self.extra_time_pre = extra_time_pre
self.extra_time = extra_time
self._current_time_pre = 0
self._current_time = 0
self._current_time_post = 0

def pre_convert_next(self, time_length: float):
in_feature = self.voice_changer_stream.pre_convert(
start_time=self._current_time_pre,
time_length=time_length,
extra_time=self.extra_time_pre,
)
self._current_time_pre += time_length
return in_feature

def convert_next(self, time_length: float):
out_feature = self.voice_changer_stream.convert(
start_time=self._current_time,
time_length=time_length,
extra_time=self.extra_time,
)
self._current_time += time_length
return out_feature

def post_convert_next(self, time_length: float):
out_wave = self.voice_changer_stream.post_convert(
start_time=self._current_time_post,
time_length=time_length,
)
self._current_time_post += time_length
return out_wave

def remove_previous(self):
end_time = min(
self._current_time_pre - self.extra_time_pre,
self._current_time - self.extra_time,
self._current_time_post,
)
self.voice_changer_stream.remove(end_time=end_time)
Loading

0 comments on commit 210e822

Please sign in to comment.