forked from haugjo/fires
-
Notifications
You must be signed in to change notification settings - Fork 0
/
fsds.py
76 lines (59 loc) · 2.37 KB
/
fsds.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import numpy as np
import numpy.linalg as ln
class StreamFeatWeight:
"""
Streaming update of feature weights at time t
Input : Yt: m by nt matrix,
B : sketch matrix size m by l
Output : Feature importance score
"""
def __init__(self, m, k, l=0):
"""
m : no of features initially
k : no of singular vectors (this can be the same as
the number of clusters in the dataset)
l : sketch size for a sketched matrix B( m-by-l )
"""
self.m = m
self.k = k
if l < 1: self.l = int(np.sqrt(self.m))
else: self.l = l
def low_rank_approximation(self, Yt):
"""
Calculation of low rank approximation
sketched matrix B is updated on basis of new inputs at timestep t
:param Yt: m-by-nt input matrix from data stream
Yt is the data items introduced at time step t
output: weight of each feature
"""
#Step 1
# combine current sketched matrix with input at time t(Yt)
if hasattr(self, 'B'): #(object, name)
C = np.hstack((self.B, Yt)) # C is m by (n+l) matrix
n = Yt.shape[1] # it will be n_t
else:
# an initial sketch matrix for Y0
self.B = Yt[:, :self.l]
C = np.hstack((self.B, Yt[:, self.l:]))
n = Yt.shape[1] - self.l
# Step 2 :Singular value decomposition
U, s, V = ln.svd(C, full_matrices=False)
U = U[:, :self.l]
s = s[:self.l]
V = V[:, :self.l]
# Step 3 : shrink singular values in Frequent Directions algorithm
delta = s[-1] ** 2 # shrink values on the basis of squared
# smallest singlar value
s = np.sqrt(s ** 2 - delta)
# Step 4 : update sketched matrix B
self.B = np.dot(U, np.diag(s))
# In Section 5.1, for all experiments,
# the authors have set alpha = 2^3 * sigma_k based on the pre-experiment
alpha = (2 ** 3) * s[self.k-1]
# Step 5: solving the ridge regression by using
# the top-k singular values
D = np.diag(s[:self.k] / (s[:self.k] ** 2 + alpha))
#step 6: X: m by k matrix (k <= l)
X = np.dot(U[:, :self.k], D)
#step 7: returning maximum value of X
return np.amax(abs(X), axis=1)