-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
executable file
·91 lines (72 loc) · 3.22 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import os.path
import numpy as np
from numpy.linalg import norm
import matplotlib.pyplot as plt
import scipy.sparse
from scipy.optimize import approx_fprime
def savefig(fname, verbose=True):
plt.tight_layout()
path = os.path.join('..', 'figs', fname)
plt.savefig(path)
if verbose:
print("Figure saved as '{}'".format(path))
def dijkstra(G, i=None, j=None):
'''Computes shortest distance between all pairs of nodes given an adjacency matrix G,
where G[i,j]=0 implies there is no edge from i to j.
Parameters
----------
G : an N by N numpy array
'''
dist = scipy.sparse.csgraph.dijkstra(G, directed=False)
if i is not None and j is not None:
return dist[i,j]
else:
return dist
def standardize_cols(X, mu=None, sigma=None):
# Standardize each column with mean 0 and variance 1
n_rows, n_cols = X.shape
if mu is None:
mu = np.mean(X, axis=0)
if sigma is None:
sigma = np.std(X, axis=0)
sigma[sigma < 1e-8] = 1.
return (X - mu) / sigma
def euclidean_dist_squared(X, Xtest):
"""Computes the Euclidean distance between rows of 'X' and rows of 'Xtest'
Parameters
----------
X : an N by D numpy array
Xtest: an T by D numpy array
Returns: an array of size N by T containing the pairwise squared Euclidean distances.
Python/Numpy (and other numerical languages like Matlab and R)
can be slow at executing operations in `for' loops, but allows extremely-fast
hardware-dependent vector and matrix operations. By taking advantage of SIMD registers and
multiple cores (and faster matrix-multiplication algorithms), vector and matrix operations in
Numpy will often be several times faster than if you implemented them yourself in a fast
language like C. The following code will form a matrix containing the squared Euclidean
distances between all training and test points. If the output is stored in D, then
element D[i,j] gives the squared Euclidean distance between training point
i and testing point j. It exploits the identity (a-b)^2 = a^2 + b^2 - 2ab.
The right-hand-side of the above is more amenable to vector/matrix operations.
"""
# add extra dimensions so that the function still works for X and/or Xtest are 1-D arrays.
if X.ndim == 1:
X = X[None]
if Xtest.ndim == 1:
Xtest = Xtest[None]
return np.sum(X**2, axis=1)[:,None] + np.sum(Xtest**2, axis=1)[None] - 2 * np.dot(X,Xtest.T)
def check_gradient(model, X, y, verbose=True, epsilon=1e-6):
# This checks that the gradient implementation is correct
w = np.random.randn(model.w.size)
f, g = model.funObj(w, X, y)
# Check the gradient
estimated_gradient = approx_fprime(w,
lambda w: model.funObj(w,X,y)[0],
epsilon=epsilon)
implemented_gradient = model.funObj(w, X, y)[1]
if np.max(np.abs(estimated_gradient - implemented_gradient))/np.linalg.norm(estimated_gradient) > 1e-6:
raise Exception('User and numerical derivatives differ:\n%s\n%s' %
(estimated_gradient[:5], implemented_gradient[:5]))
else:
if verbose:
print('User and numerical derivatives agree.')