-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
276 lines (238 loc) · 8.08 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
from __future__ import division
import numpy as np
from matplotlib.pyplot import hist
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
import dataset
import nibabel as nib
def get_search_data(split_ratio=1, include_fit_data=False):
"""
Method to load and split param_eval data
@param include_fit_data: if true then include all data in data/gen as well, i.e fitting data
@return: (X_train, y_train, X_test, y_test)
"""
# Load newly generated search data
train, _, _ = dataset.load_dataset(288, './data/search/', split_ratio=(1, 0, 0))
X = train[0]
y = train[1][:,0].reshape(-1,1)
if include_fit_data:
train, _, _ = dataset.load_dataset(288, './data/gen/', split_ratio=(1, 0, 0))
X = np.concatenate((train[0], X))
y = np.concatenate((train[1][:,0].reshape(-1,1), y))
split = int(X.shape[0] * split_ratio)
indices = np.random.permutation(X.shape[0])
training_idx, test_idx = indices[:split], indices[split:]
return X[training_idx, :], y[training_idx, :], X[test_idx, :], y[test_idx, :]
def get_hpc_data(filename='./data/hpc/50000_scanned_voxels.Bfloat', sample_size=None):
"""
Helper method for loading HPC data from disk
@param filename: a string specifying the path to the binary HPC data
@param sample_size: number, only return a sample of specified size
@return:
"""
arr = to_voxels(read_float(filename))
if sample_size is not None:
np.random.shuffle(arr)
return arr[0:sample_size, :]
return arr
def load_nib_data(filename, sample_size=None):
"""
Helper method for loading nib data from disk
@param filename: the filename to the nib file
@param sample_size: None if all data should be returnd or if number a random sample is taken
@return: The data in a 4d matrix (x, y, z, d)
"""
img = nib.load(filename)
data = img.get_data()
if sample_size is not None:
np.random.shuffle(data)
return data[0:sample_size, :, :, :]
return data
def read_float(filename):
"""
Helper method for reading binary float file
@param filename: the filename to read
@return: an array of the floats in the binary file
"""
f = open(filename, "r")
arr = np.fromfile(f, dtype='>f4')
return arr
def filter_zeros(X):
"""
Helper method to filter zeros
@param X: The matrix to filter out zeros from
@return: The matrix with zeros filtered out
"""
noNonzeros = np.count_nonzero(X, axis=1)
mask = np.where(noNonzeros > 0)
return X[mask[0], :]
def to_voxels(arr, channels=288):
"""
Helper method to convert 1D-array to voxel arranged data
@param arr: the 1D array to convert
@param channels: number of DWIs, i.e the number of channels / features in the data
@return: a n x array
"""
no_samples = int(arr.size / channels)
return np.reshape(arr, (no_samples, channels))
def diff_plot(targets, predictions, filename, remove_outliers=False):
"""
Method that creates ad saves a scatter plot of targets vs predictions
@param targets: the targets array
@param predictions: the predictions array
@param filename: the filename of where to save the plot
@param remove_outliers: if outliers should be removed from plotting (to get better scale)
@return: nothing
"""
if remove_outliers:
indices = np.where(np.logical_not(np.logical_or(np.abs(predictions) > 10 * np.abs(targets), np.abs(predictions) < np.abs(targets) / 10.0)))
targets = targets[indices]
predictions = predictions[indices]
if targets.shape[0] != 0:
fig, ax = plt.subplots()
fig.suptitle(str(targets.shape[0]) + ' samples, R2: ' + str(r2(targets, predictions)), fontsize=12)
axes = plt.gca()
axes.set_ylim(np.min(predictions), np.max(predictions))
axes.set_xlim(np.min(targets), np.max(targets))
ax.scatter(targets, predictions, edgecolors=(0, 0, 0))
ax.set_xlabel('Targets')
ax.set_ylabel('Predictions')
ax.plot([targets.min(), targets.max()], [targets.min(), targets.max()], 'k--', lw=4)
plt.savefig(filename)
plt.close()
def loss_plot(train_loss, val_loss, filename):
"""
Method that creates and saves a loss plot
@param train_loss: array or list of the training loss
@param val_loss: array or list of the validation loss
@param filename: the filename of where to save the plot
@return: nothing
"""
plt.plot(train_loss)
plt.plot(val_loss)
plt.ylabel('Loss')
plt.xlabel('Epochs')
plt.legend(['Train', 'Val'], loc='upper right')
plt.savefig(filename)
plt.close()
def residual_plot(targets, predictions, filename):
"""
Mehtod that creates and saves a residual plot
@param targets: the targets array
@param predictions: the predictions array
@param filename: the filename of where to save the plot
@return: nothing
"""
fig, ax = plt.subplots()
fig.suptitle(str(targets.shape[0]) + ' samples, Residual Plot', fontsize=12)
residuals = targets - predictions
axes = plt.gca()
axes.set_ylim(np.min(residuals), np.max(residuals))
axes.set_xlim(np.min(predictions), np.max(predictions))
ax.scatter(predictions, residuals, edgecolors=(0, 0, 0))
ax.set_xlabel('Predictions')
ax.set_ylabel('Residuals')
plt.savefig(filename)
plt.close()
def heat_plot(matrix, filename, xTicks, yTicks, xLabel='X', yLabel='Y'):
"""
Method that creates and saves a heat plot between two hyperparameters
@param matrix: a matrix of r2 scores
@param filename: the filename and directory of where to save the plot
@param xTicks: the ticks to use on x-axis
@param yTicks: the ticks to use on y-axis
@param xLabel: the label describing the data in the x-axis
@param yLabel: the label describing the data in the y-axis
@return: nothing
"""
fig = plt.figure()
ax = fig.add_subplot(111)
cax = ax.matshow(matrix, vmin=0, vmax=1)
fig.colorbar(cax)
ticks = np.arange(0, matrix.shape[0], 1)
ax.set_xticks(ticks)
ax.set_yticks(ticks)
ax.set_xticklabels(xTicks)
ax.set_yticklabels(yTicks)
ax.set_xlabel(xLabel)
ax.set_ylabel(yLabel)
plt.savefig(filename)
plt.close()
def model_comp_plot(id_model_list, filename):
"""
Creates and saves a plot of model id vs MSE
@param id_model_list: a list of dicts holding the mse for each model id
@param filename: the filename and directory where the plot should be saved
@return: nothing
"""
axes = plt.gca()
axes.set_ylim(0, 10 * np.median([k['mse'] for i, k in enumerate(id_model_list)]))
plt.plot([k['id'] for i, k in enumerate(id_model_list)], [k['mse'] for i, k in enumerate(id_model_list)], 'bo')
plt.ylabel('Validation MSE')
plt.xlabel('Model ID')
plt.savefig(filename)
plt.close()
def plot_features(inputs, nbins=50):
"""
Helper method for plotting each feature in a histogram
@param inputs: the inputs to plot
@return: nothing but shows a plot
"""
for i in range(0, inputs.shape[1]):
x = inputs[:, i]
n, bins, patches = hist(x, bins=nbins, range=None, rwidth=0.8, normed=False, weights=None, cumulative=False, bottom=None)
print('BINS: %i', bins)
print('N: %i', n)
plt.show()
def plot_targets(targets):
"""
Helper method for plotting targets in histogram
@param targets: the targets to plot
@return: nothing but shows a plot
"""
hist(targets, bins='auto', range=None, normed=False, weights=None, cumulative=False, bottom=None)
plt.show()
def r2(t, y):
"""
Method that computes R2 score
@param t: targets array
@param y: predictions array
@return: the r2 score between t and y
"""
return r2_score(t, y)
def mae(t, y):
"""
Method that computes the mean absolute error
@param t: targets array
@param y: predictions array
@return: the mae between t and y
"""
return mean_absolute_error(t, y)
def mse(t, y, rmse=False):
"""
Method that computes the mean squared error
@param t: targets array
@param y: predictions array
@param rmse: boolean, if true return root MSE
@return: the mean squared erro between t and y
"""
mse = mean_squared_error(t, y)
if rmse:
mse = mse ** 0.5
return mse
def print_and_append(string, outfile, new_line=False):
"""
Helper method for both printing and appending to an output file
@param string: the string to append
@param outfile: the file to append to
@param new_line: if True, include a space after appending
@return: nothing
"""
if outfile is not None:
outfile.write(string)
outfile.write('\n')
if new_line:
outfile.write('\n')
print(string)
if new_line:
print '\n'