-
Notifications
You must be signed in to change notification settings - Fork 23
/
Copy pathestimate_priors.py
310 lines (263 loc) · 17.4 KB
/
estimate_priors.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
"""
If you use this code, please the SynthSR paper in:
https://github.com/BBillot/SynthSR/blob/master/bibtex.bib
Copyright 2020 Benjamin Billot
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in
compliance with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is
distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied. See the License for the specific language governing permissions and limitations under the
License.
"""
# python imports
import os
import numpy as np
from scipy.stats import median_absolute_deviation
# third-party imports
from ext.lab2im import utils
from ext.lab2im import edit_volumes
def estimate_t2_cropping(image_dir, result_dir=None, dilation=5):
"""This function takes all the hippocampus images (with 2 channels) within the specified directory, and estimates
the cropping dimensions around the hippocampus in the t2 channel.
It returns the mean and sts deviation for the minimal and maximal croppings, proportional to image size.
:param image_dir: path of the folder containing hippocampus images
:param result_dir: if not None, path of the folder where to write the computed statistics.
:param dilation: dilation coefficient used to extract full brain mask. Default is 5.
:returns t2_cropping_stats: numpy vector of size 4 [mean min crop, std min crop, mean max crop, std max crop]
"""
# create result dir
if result_dir is not None:
utils.mkdir(result_dir)
# loop through images
list_image_paths = utils.list_images_in_folder(image_dir)
max_cropping_proportions = np.zeros(len(list_image_paths))
min_cropping_proportions = np.zeros(len(list_image_paths))
loop_info = utils.LoopInfo(len(list_image_paths), 10, 'processing')
for im_idx, image_path in enumerate(list_image_paths):
loop_info.update(im_idx)
# load t2 channel
im = utils.load_volume(image_path)
t2 = im[..., 1]
shape = t2.shape
hdim = int(np.argmax(shape))
# mask image
_, mask = edit_volumes.mask_volume(t2, threshold=0, dilate=dilation, return_mask=True)
# find cropping indices
indices = np.nonzero(mask)[hdim]
min_cropping_proportions[im_idx] = np.maximum(np.min(indices) + int(dilation/2), 0) / shape[hdim]
max_cropping_proportions[im_idx] = np.minimum(np.max(indices) - int(dilation/2), shape[hdim]) / shape[hdim]
# compute and save stats
t2_cropping_stats = np.array([np.mean(min_cropping_proportions),
np.std(min_cropping_proportions),
np.mean(max_cropping_proportions),
np.std(max_cropping_proportions)])
# save stats if necessary
if result_dir is not None:
np.save(os.path.join(result_dir, 't2_cropping_stats.npy'), t2_cropping_stats)
return t2_cropping_stats
def sample_intensity_stats_from_image(image, segmentation, labels_list, classes_list=None, keep_strictly_positive=True):
"""This function takes an image and corresponding segmentation as inputs. It estimates the mean and std intensity
for all specified label values. Labels can share the same statistics by being regrouped into K classes.
:param image: image from which to evaluate mean intensity and std deviation.
:param segmentation: segmentation of the input image. Must have the same size as image.
:param labels_list: list of labels for which to evaluate mean and std intensity.
Can be a sequence, a 1d numpy array, or the path to a 1d numpy array.
:param classes_list: (optional) enables to regroup structures into classes of similar intensity statistics.
Intenstites associated to regrouped labels will thus contribute to the same Gaussian during statistics estimation.
Can be a sequence, a 1d numpy array, or the path to a 1d numpy array.
It should have the same length as labels_list, and contain values between 0 and K-1, where K is the total number of
classes. Default is all labels have different classes (K=len(labels_list)).
:param keep_strictly_positive: (optional) whether to only keep strictly positive intensity values when
computing stats. This doesn't apply to the first label in label_list (or class if class_list is provided), for
which we keep positive and zero values, as we consider it to be the background label.
:return: a numpy array of size (2, K), the first row being the mean intensity for each structure,
and the second being the median absolute deviation (robust estimation of std).
"""
# reformat labels and classes
labels_list = np.array(utils.reformat_to_list(labels_list, load_as_numpy=True, dtype='int'))
if classes_list is not None:
classes_list = np.array(utils.reformat_to_list(classes_list, load_as_numpy=True, dtype='int'))
else:
classes_list = np.arange(labels_list.shape[0])
assert len(classes_list) == len(labels_list), 'labels and classes lists should have the same length'
# get unique classes
unique_classes, unique_indices = np.unique(classes_list, return_index=True)
n_classes = len(unique_classes)
if not np.array_equal(unique_classes, np.arange(n_classes)):
raise ValueError('classes_list should only contain values between 0 and K-1, '
'where K is the total number of classes. Here K = %d' % n_classes)
# compute mean/std of specified classes
means = np.zeros(n_classes)
stds = np.zeros(n_classes)
for idx, tmp_class in enumerate(unique_classes):
# get list of all intensity values for the current class
class_labels = labels_list[classes_list == tmp_class]
intensities = np.empty(0)
for label in class_labels:
tmp_intensities = image[segmentation == label]
intensities = np.concatenate([intensities, tmp_intensities])
if tmp_class: # i.e. if not background
if keep_strictly_positive:
intensities = intensities[intensities > 0]
# compute stats for class and put them to the location of corresponding label values
if len(intensities) != 0:
means[idx] = np.nanmedian(intensities)
stds[idx] = median_absolute_deviation(intensities, nan_policy='omit')
return np.stack([means, stds])
def sample_intensity_stats_from_single_dataset(image_dir, labels_dir, labels_list, classes_list=None, max_channel=3,
rescale=True):
"""This function aims at estimating the intensity distributions of K different structure types from a set of images.
The distribution of each structure type is modelled as a Gaussian, parametrised by a mean and a standard deviation.
Because the intensity distribution of structures can vary accross images, we additionally use Gausian priors for the
parameters of each Gaussian distribution. Therefore, the intensity distribution of each structure type is described
by 4 parameters: a mean/std for the mean intensity, and a mean/std for the std deviation.
This function uses a set of images along with corresponding segmentations to estimate the 4*K parameters.
Structures can share the same statistics by being regrouped into classes of similar structure types.
Images can be multi-modal (n_channels), in which case different statistics are estimated for each modality.
:param image_dir: path of directory with images to estimate the intensity distribution
:param labels_dir: path of directory with segmentation of input images.
They are matched with images by sorting order.
:param labels_list: list of labels for which to evaluate mean and std intensity.
Can be a sequence, a 1d numpy array, or the path to a 1d numpy array.
:param classes_list: (optional) enables to regroup structures into classes of similar intensity statistics.
Intenstites associated to regrouped labels will thus contribute to the same Gaussian during statistics estimation.
Can be a sequence, a 1d numpy array, or the path to a 1d numpy array.
It should have the same length as labels_list, and contain values between 0 and K-1, where K is the total number of
classes. Default is all labels have different classes (K=len(labels_list)).
:param max_channel: (optional) maximum number of channels to consider if the data is multispectral. Default is 3.
:param rescale: (optional) whether to rescale images between 0 and 255 before intensity estimation
:return: 2 numpy arrays of size (2*n_channels, K), one with the evaluated means/std for the mean
intensity, and one for the mean/std for the standard deviation.
Each block of two rows correspond to a different modality (channel). For each block of two rows, the first row
represents the mean, and the second represents the std.
"""
# list files
path_images = utils.list_images_in_folder(image_dir)
path_labels = utils.list_images_in_folder(labels_dir)
assert len(path_images) == len(path_labels), 'image and labels folders do not have the same number of files'
# reformat list labels and classes
labels_list = np.array(utils.reformat_to_list(labels_list, load_as_numpy=True, dtype='int'))
if classes_list is not None:
classes_list = np.array(utils.reformat_to_list(classes_list, load_as_numpy=True, dtype='int'))
else:
classes_list = np.arange(labels_list.shape[0])
assert len(classes_list) == len(labels_list), 'labels and classes lists should have the same length'
# get unique classes
unique_classes, unique_indices = np.unique(classes_list, return_index=True)
n_classes = len(unique_classes)
if not np.array_equal(unique_classes, np.arange(n_classes)):
raise ValueError('classes_list should only contain values between 0 and K-1, '
'where K is the total number of classes. Here K = %d' % n_classes)
# initialise result arrays
n_dims, n_channels = utils.get_dims(utils.load_volume(path_images[0]).shape, max_channels=max_channel)
means = np.zeros((len(path_images), n_classes, n_channels))
stds = np.zeros((len(path_images), n_classes, n_channels))
# loop over images
loop_info = utils.LoopInfo(len(path_images), 10, 'estimating', print_time=True)
for idx, (path_im, path_la) in enumerate(zip(path_images, path_labels)):
loop_info.update(idx)
# load image and label map
image = utils.load_volume(path_im)
la = utils.load_volume(path_la)
if n_channels == 1:
image = utils.add_axis(image, -1)
# loop over channels
for channel in range(n_channels):
im = image[..., channel]
if rescale:
im = edit_volumes.rescale_volume(im)
stats = sample_intensity_stats_from_image(im, la, labels_list, classes_list=classes_list)
means[idx, :, channel] = stats[0, :]
stds[idx, :, channel] = stats[1, :]
# compute prior parameters for mean/std
mean_means = np.mean(means, axis=0)
std_means = np.std(means, axis=0)
mean_stds = np.mean(stds, axis=0)
std_stds = np.std(stds, axis=0)
# regroup prior parameters in two different arrays: one for the mean and one for the std
prior_means = np.zeros((2 * n_channels, n_classes))
prior_stds = np.zeros((2 * n_channels, n_classes))
for channel in range(n_channels):
prior_means[2 * channel, :] = mean_means[:, channel]
prior_means[2 * channel + 1, :] = std_means[:, channel]
prior_stds[2 * channel, :] = mean_stds[:, channel]
prior_stds[2 * channel + 1, :] = std_stds[:, channel]
return prior_means, prior_stds
def build_intensity_stats(list_image_dir,
list_labels_dir,
result_dir,
estimation_labels,
estimation_classes=None,
max_channel=3,
rescale=True):
"""This function aims at estimating the intensity distributions of K different structure types from a set of images.
The distribution of each structure type is modelled as a Gaussian, parametrised by a mean and a standard deviation.
Because the intensity distribution of structures can vary accross images, we additionally use Gausian priors for the
parameters of each Gaussian distribution. Therefore, the intensity distribution of each structure type is described
by 4 parameters: a mean/std for the mean intensity, and a mean/std for the std deviation.
This function uses a set of images along with corresponding segmentations to estimate the 4*K parameters.
Additionally, it can estimate the 4*K parameters for several image datasets, that we call here n_datasets.
This function writes 2 numpy arrays of size (2*n_datasets, K), one with the evaluated means/std for the mean
intensities, and one for the mean/std for the standard deviations.
In these arrays, each block of two rows refer to a different dataset.
Within each block of two rows, the first row represents the mean, and the second represents the std.
SEE SCRIPTS/TUTORIALS/8-ESTIMATE_PRIORS.PY FOR EXAMPLES OF HOW TO USE THIS FUNCTION
:param list_image_dir: path of folders with images for intensity distribution estimation.
Can be the path of single directory (n_datasets=1), or a list of folders, each being a separate dataset.
Images can be multimodal, in which case each modality is treated as a different dataset, i.e. each modality will
have a separate block (of size (2, K)) in the result arrays.
:param list_labels_dir: path of folders with label maps corresponding to input images.
If list_image_dir is a list of several folders, list_labels_dir can either be a list of folders (one for each image
folder), or the path to a single folder, which will be used for all datasets.
If a dataset has multi-modal images, the same label map is applied to all modalities.
:param result_dir: path of directory where estimated priors will be writen.
:param estimation_labels: labels to estimate intensity statistics from.
Can be a sequence, a 1d numpy array, or the path to a 1d numpy array.
:param estimation_classes: (optional) enables to regroup structures into classes of similar intensity statistics.
Intenstites associated to regrouped labels will thus contribute to the same Gaussian during statistics estimation.
Can be a sequence, a 1d numpy array, or the path to a 1d numpy array.
It should have the same length as labels_list, and contain values between 0 and K-1, where K is the total number of
classes. Default is all labels have different classes (K=len(estimation_labels)).
:param max_channel: (optional) maximum number of channels to consider if the data is multispectral. Default is 3.
:param rescale: (optional) whether to rescale images between 0 and 255 before intensity estimation
"""
# handle results directories
utils.mkdir(result_dir)
# reformat image/labels dir into lists
list_image_dir = utils.reformat_to_list(list_image_dir)
list_labels_dir = utils.reformat_to_list(list_labels_dir, length=len(list_image_dir))
# reformat list estimation labels and classes
estimation_labels = np.array(utils.reformat_to_list(estimation_labels, load_as_numpy=True, dtype='int'))
if estimation_classes is not None:
estimation_classes = np.array(utils.reformat_to_list(estimation_classes, load_as_numpy=True, dtype='int'))
else:
estimation_classes = np.arange(estimation_labels.shape[0])
assert len(estimation_classes) == len(estimation_labels), 'estimation labels and classes should be of same length'
# get unique classes
unique_estimation_classes, unique_indices = np.unique(estimation_classes, return_index=True)
n_classes = len(unique_estimation_classes)
if not np.array_equal(unique_estimation_classes, np.arange(n_classes)):
raise ValueError('estimation_classes should only contain values between 0 and N-1, '
'where K is the total number of classes. Here N = %d' % n_classes)
# loop over dataset
list_datasets_prior_means = list()
list_datasets_prior_stds = list()
for image_dir, labels_dir in zip(list_image_dir, list_labels_dir):
# get prior stats for dataset
tmp_prior_means, tmp_prior_stds = sample_intensity_stats_from_single_dataset(image_dir,
labels_dir,
estimation_labels,
estimation_classes,
max_channel=max_channel,
rescale=rescale)
# add stats arrays to list of datasets-wise statistics
list_datasets_prior_means.append(tmp_prior_means)
list_datasets_prior_stds.append(tmp_prior_stds)
# stack all modalities together
prior_means = np.concatenate(list_datasets_prior_means, axis=0)
prior_stds = np.concatenate(list_datasets_prior_stds, axis=0)
# save files
np.save(os.path.join(result_dir, 'prior_means.npy'), prior_means)
np.save(os.path.join(result_dir, 'prior_stds.npy'), prior_stds)
return prior_means, prior_stds