-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathJKnoisecancel.py
134 lines (111 loc) · 4.74 KB
/
JKnoisecancel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
from scipy import signal
from scipy.fftpack import fft
import librosa
import numpy as np
from scipy.io.wavfile import read,write
import scipy
from scipy.ndimage import maximum_filter1d
import urllib
import requests
import json
import minus
url = "https://procon33-practice.kosen.work"
filepath = "./problem"
token = open("token.txt").read()
def wav_read(path): # 音声ファイルを読み込む
wave, fs = librosa.core.load(path, mono=True)
return wave, fs
def calc_fft(data, fs): # FFTする
frq = np.linspace(0, fs, len(data))
yf = fft(data)/(len(data)/2)
return np.abs(yf), frq
def envelope(y, rate, threshold):
y_mean = maximum_filter1d(np.abs(y), mode="constant", size=rate//20)
mask = [mean > threshold for mean in y_mean]
return mask, y_mean
def _db_to_amp(x,):
return librosa.core.db_to_amplitude(x, ref=1.0)
n_fft=2048 # STFTカラム間の音声フレーム数
hop_length=512 # STFTカラム間の音声フレーム数
win_length=2048 # ウィンドウサイズ
n_std_thresh=1.0 # 信号とみなされるために、ノイズの平均値よりも大きい標準偏差(各周波数レベルでの平均値のdB)が何個あるかのしきい値
sample_rate= 22050
fs = 22050
def _stft(y, n_fft, hop_length, win_length):
return librosa.stft(y=y, n_fft=n_fft, hop_length=hop_length, win_length=win_length)
def _amp_to_db(x):
return librosa.core.amplitude_to_db(x, ref=1.0, amin=1e-20, top_db=80.0)
def _istft(y, hop_length, win_length):
return librosa.istft(y, hop_length, win_length)
def noise_cancel(sourceAudio,noise,noisename):
sourceAudio1 = "./processing/"+sourceAudio+"/"+noisename+ ".wav"
noise1 = "./JKspeech/"+ noise + ".wav"
audio_clip,fs = wav_read(sourceAudio1)
noise_clip,fs = wav_read(noise1)
noise = noise.replace("./JKspeech/","")
path = sourceAudio1.split('/')
noise_stft = _stft(noise_clip, n_fft, hop_length, win_length)
noise_stft_db = _amp_to_db(np.abs(noise_stft)) # dBに変換する
mean_freq_noise = np.mean(noise_stft_db, axis=1)
std_freq_noise = np.std(noise_stft_db, axis=1)
noise_thresh = mean_freq_noise + std_freq_noise * n_std_thresh
n_grad_freq = 2 # マスクで平滑化する周波数チャンネルの数
n_grad_time = 4 # マスクを使って滑らかにする時間チャンネル数
prop_decrease = 1 # ノイズをどの程度減らすか
# 音源もSTFTで特徴量抽出する
sig_stft = _stft(audio_clip, n_fft, hop_length, win_length)
sig_stft_db = _amp_to_db(np.abs(sig_stft))
# 時間と頻度でマスクの平滑化フィルターを作成
smoothing_filter = np.outer(
np.concatenate(
[
np.linspace(0, 1, n_grad_freq + 1, endpoint=False),
np.linspace(1, 0, n_grad_freq + 2),
]
)[1:-1],
np.concatenate(
[
np.linspace(0, 1, n_grad_time + 1, endpoint=False),
np.linspace(1, 0, n_grad_time + 2),
]
)[1:-1],
)
smoothing_filter = smoothing_filter / np.sum(smoothing_filter)
# 時間と周波数のしきい値の計算
db_thresh = np.repeat(
np.reshape(noise_thresh, [1, len(mean_freq_noise)]),
np.shape(sig_stft_db)[1],
axis=0,
).T
sig_mask = sig_stft_db < db_thresh
sig_mask = scipy.signal.fftconvolve(sig_mask, smoothing_filter, mode="same")
sig_mask = sig_mask * prop_decrease
mask_gain_dB = np.min(_amp_to_db(np.abs(sig_stft)))
sig_stft_db_masked = (
sig_stft_db * (1 - sig_mask)
+ np.ones(np.shape(mask_gain_dB)) * mask_gain_dB * sig_mask
)
sig_imag_masked = np.imag(sig_stft) * (1 - sig_mask)
sig_stft_amp = (_db_to_amp(sig_stft_db_masked) * np.sign(sig_stft)) + (1j * sig_imag_masked)
recovered_signal = _istft(sig_stft_amp, hop_length, win_length)
recovered_signal = recovered_signal.astype(np.float32)
noisename = noise + noisename + "noisecan"
write("{}/{}/{}/{}.wav".format(path[0],path[1],path[2],noisename),rate = fs, data = recovered_signal)
return noisename
endpoint =urllib.parse.urljoin(url,"problem")
res = requests.get(endpoint, headers = {"procon-token": token})
res = json.loads(res.text)
name = res["id"]
noisename = name
while True:
noise = input("消す音源:")
lang = noise[0]
noise = noise[1]
nmusic = ""
lan = "あいうえおかきくけこさしすせそたちつてとなにぬねのはひふへほまみむめもやゆよらりるれろわ"
for i in range(43):
for j in noise:
if j == lan[i]:
nmusic = str(i+1).zfill(2)
break
name = minus.minus(nmusic,)