-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathFeatureExtractor.py
121 lines (100 loc) · 5.14 KB
/
FeatureExtractor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import numpy as np
import pandas as pd
import progressbar as pb
import datetime
import Quaternions
import DataLoader
#
# Helper functions for extracting features from dataset using sliding window
#
#freqbins = [ (0.0,0.5) , (0.5,1.0) , (1.0,2.0) , (2.0,4.0) , (4.0,8.0) , (8.0,16.001) ]
#freqbins = [ (0.0,0.8) , (0.8,2.0) , (2.0,4.0) , (4.0,8.0) , (8.0,16.001) ]
freqbins = [ (0.0,0.5) , (0.5,3.0) , (3.0,8.0) , (8.0,16.0) ]
def limbfeatures( df , N=128 , freqbins = [ (0.0,0.5) , (0.5,3.0) , (3.0,8.0) , (8.0,16.0) ]):
df_features = df.copy()
df_features.set_index( 'DATETIME' , inplace=True )
# Fill in the missing times
# I have to use this stupid method filling in 1s at a time because the timedelta
# is irrational. Loop over series in 1 second intervals. For each interval append all the irrational times in between...
timeseries = None
for t in pd.date_range(df_features.index.min().floor('s'),df_features.index.max().ceil('s'),freq=datetime.timedelta(seconds=1.0)):
tmp = pd.date_range( t , t+datetime.timedelta(milliseconds=999.0) , freq=datetime.timedelta(milliseconds=1000.0/float(DataLoader.FREQ)) )
if timeseries is None:
timeseries = tmp
else:
timeseries = timeseries.append( tmp )
df_timeseries = pd.DataFrame( index=timeseries )
df_features = df_timeseries.join( df_features , how='left' , sort=True )
# Compute a bunch of window features with 5s window
# N = 128
#for fname in ['Q0','Q1','Q2','Q3','AMAG','ATRAN','AZ','PHI','THETA','PSI','AZQ','ATRANQ','XYANGLE']:
#for fname in [ 'AMAG' , 'AZ' , 'ATRAN' , 'AZQ' , 'ATRANQ' , 'XYANGLE' ]:
for fname in ['AMAG','AZ','ATRAN','XYANGLE']:
print 'Computing features for', fname, N
df_features = add_window_features( df_features , fname , N , freqbins )
# Create a Sum(fft) summing over all quaternions
#for w in range(len(freqbins)):
# c = [ 'FFTP_W%i_%s_%i'%(w,s,N) for s in ['Q0','Q1','Q2','Q3'] ]
# df_features['FFTP_W%i_QSUM_%i'%(w,N)] = df_features[c].sum(axis=1)
# df_features['FFTP_W%i_QMAX_%i'%(w,N)] = df_features[c].max(axis=1)
#for w in range(len(freqbins)):
# c = [ 'FFTP_W%i_%s_%i'%(w,s,N) for s in ['PHI','THETA','PSI'] ]
# df_features['FFTP_W%i_EULERSUM_%i'%(w,N)] = df_features[c].sum(axis=1)
# df_features['FFTP_W%i_EULERMAX_%i'%(w,N)] = df_features[c].max(axis=1)
# Psi has some long period oscillations when people walk,
# so lets also grab a 15 second window
#N = 480
#for fname in ['PSI']:
# print 'Rolling features for', fname, N
# df_features = add_window_features( df_features , fname , N , freqbins )
return df_features
def add_window_features( df , fname , N , freqbins ):
# Determine indices of bin boundaries
freqiters = get_freqiters( freqbins , N )
# Create a rolling window object
roll = df[fname].rolling( min_periods=N , window=N , center=True )
# All the dirty work here
df['MEAN_%s_%i'%(fname,N)] = roll.mean()
df['STD_%s_%i'%(fname,N)] = roll.std()
df['MAX_%s_%i'%(fname,N)] = roll.max()
df['MIN_%s_%i'%(fname,N)] = roll.min()
df['MED_%s_%i'%(fname,N)] = roll.median()
df['RANGE_%s_%i'%(fname,N)] = roll.apply( lambda x: x.max()-x.min() )
df['SKEW_%s_%i'%(fname,N)] = roll.skew()
#df['SUM_%s_%i'%(fname,N)] = roll.sum() / float(N)
for ifreq,freq in enumerate(freqiters):
df['FFTP_W%i_%s_%i'%(ifreq,fname,N)] = roll.apply( lambda x: np.abs(np.fft.rfft(x))[freq[0]].sum() / freq[1] )
return df
def zero_crosses( s ):
n = 0
for i in s.index:
pass
# FIXME
return n
def get_freqiters( bins , N=160 ):
freq = np.fft.rfftfreq(N,d=1.0/float(DataLoader.FREQ))[1:]
freqiters = []
for ib,b in enumerate(bins):
# determine all of the freq indices inside each bin
inbin = []
for ix,x in enumerate(freq):
if x >= b[0] and x < b[1]:
inbin += [ ix+1 ] # plus one because freq index starts with 1 !!
# Create a slice with the max and the min
lo, hi = min(inbin), max(inbin)
freqiters += [ (slice(lo,hi),hi-lo) ]
return freqiters
def doctor_raw_data( df ):
# Ensure that all quaternions are unit-normalized
Quaternions.normalize_df( df )
# Compute acceleration magnitudes and Euler angles
df['AMAG'] = np.sqrt( np.power(df['AX'],2) + np.power(df['AY'],2) + np.power(df['AZ'],2) )
df['ATRAN'] = np.sqrt( np.power(df['AX'],2) + np.power(df['AY'],2) )
df['PHI'] = Quaternions.get_phi( df['Q0'] , df['Q1'] , df['Q2'] , df['Q3'] )
df['THETA'] = Quaternions.get_theta( df['Q0'] , df['Q1'] , df['Q2'] , df['Q3'] )
df['PSI'] = Quaternions.get_psi( df['Q0'] , df['Q1'] , df['Q2'] , df['Q3'] )
# Compute accelerations rotated into lab reference frame
df['AXQ'], df['AYQ'], df['AZQ'] = Quaternions.qv_mult( (df['Q0'],df['Q1'],df['Q2'],df['Q3']) , (df['AX'],df['AY'],df['AZ']) )
df['ATRANQ'] = np.sqrt( np.power(df['AXQ'],2) + np.power(df['AYQ'],2) )
# Compute pendulum angle w.r.t. x-y plane (in lab frame)
df['XYANGLE'] = Quaternions.get_xyangle( df['Q0'] , df['Q1'] , df['Q2'] , df['Q3'] )