-
Notifications
You must be signed in to change notification settings - Fork 0
/
preprocessing.py
56 lines (51 loc) · 1.57 KB
/
preprocessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#! /usr/bin/python
"""
Read the url from django input and get the audio from the video. Convert this
into a wav file and then obtain MFCC coefficients and apply k-means clustering
followed by vector quantization to build numerical representation of audio for
the HMMs
"""
import pafy
import scipy.io.wavfile as wvf
import scipy.cluster.vq as sp
import numpy as np
import os
import re
import sys
from features import mfcc
def get_audio_from_video(url,wavfile="temp"):
"""
Taking the YouTube URL as input, obtain the audio stream and convert it into
wav format
"""
video = pafy.new(url)
fname=""
audiostreams = video.audiostreams
for stream in audiostreams:
if stream.extension == "m4a" and stream.bitrate == '128':
fname = stream.download()
if fname=="":
for stream in audiostreams:
if stream.extension == "m4a":
fname = stream.download()
break
cmd = "ffmpeg -i "+re.escape(fname)+" "+re.escape(wavfile)+".wav"
os.system(cmd)
# conversion of audio file format: ffmpeg -i audio.aac audio.wav
def kmeans_Mfcc(codebook,wavfile="temp.wav",opfile="temp_vq.txt"):
"""
Obtain the MFCC coefficients from a wav file as a numpy matrix, then cluster the
attributes into a single numeric value and print into a file given by opfile
"""
rate,sig = wvf.read(wavfile)
mfcc_feat = mfcc(sig,rate)
#codebook = sp.kmeans(mfcc_feat, 8)[0]
data = sp.vq(mfcc_feat,codebook)
f = open(opfile,"w")
for i in data[0]:
f.write(str(i)+"\n")
f.close()
return opfile
if __name__=="__main__":
get_audio_from_video(sys.argv[1],sys.argv[2])
kmeans_Mfcc(sys.argv[2]+".wav",sys.argv[3])