-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfnirs.py
383 lines (318 loc) · 15.4 KB
/
fnirs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Sep 15 15:02:53 2021
@author: Leon D. Lotter
"""
import pandas as pd
import numpy as np
from atlasreader import get_statmap_info
from scipy.spatial import KDTree
from statsmodels.stats.multitest import fdrcorrection
from nimare.utils import mm2vox, vox2mm
from nimare.stats import null_to_p
from nilearn.image import load_img, get_data, new_img_like, math_img
from tqdm.auto import tqdm
import logging
lgr = logging.getLogger(__name__)
lgr.setLevel(logging.INFO)
def fnirs_result(data, atlas, labels, publications=None, volumes=True, verbose=True):
"""
Evaluates channel-wise MNI coordinates from fNIRS studies by a given whole-
brain parcellation. For each coordinate, the corresponding parcel is found.
For each parcel, a summary of the number of "INS"-coordinates, "non-INS"-
coordinates and associated study information is created. Additionally, the
parcel described anatomically via atlasreader using the AAL atlas.
If a coordinate is not located within the atlas, it is replaced by the
nearest coordinate within the atlas using scipy.spatial.KDTree.
Input:
data = dataframe with the following columns: 'publication': study name,
'n': number of subjects, 'sync': binary indicator of study
result, 'x', 'y', 'z': MNI coordinates
atlas = parcellation volume
labels = dataframe with atlas labels corresponding to atlas indices
with following columns: 'idx': parcel indices, 'label': parcel name
publications = list of publications to include, if None -> all
Output:
Dataframe with parcel-wise results
Dict of nifti volumes, 1: number of INS channels per parcel, 2: ratio of
INS to all channels per parcel, 3: ratio of INS to all channels per
parcel multiplied by all subjects that "contributed" to the parcel as
an (arbitrary) index of convergence
Dataframe with input study information extended by nearest coordinates
(real-world, 'i_','j_','k_') and distance ('dist') to original
coordinates ('i','j','k'). If distance == 0, nearest == original
"""
if verbose==False:
lgr.setLevel(logging.ERROR)
# fNIRS DATA
if type(data)==str:
data = pd.read_csv(data)
df = data[['publication', 'n', 'sync', 'x', 'y', 'z']]
# USER-DEFINED publications TO EXCLUDE?
if publications is not None:
df = df[df.publication.isin(publications)]
# EXCLUDE ROWS WITH MISSING COORDINATE DATA
df = df[~df['x'].isna()]
lgr.info(f'Loaded fNIRS data, kept {len(df)} rows with coordinate information '
f'from {len(df.publication.unique())} experiments.')
# check that relevant cols are numeric
df[['n', 'sync', 'x', 'y', 'z']] = df[['n', 'sync', 'x', 'y', 'z']].apply(pd.to_numeric, axis = 1)
# ATLAS
atlas = load_img(atlas)
a_dat = get_data(atlas) # data matrix
# TRANSFORM COORDINATES TO WORLD SPACE
df.loc[:, ['i', 'j', 'k']] = mm2vox(df[['x', 'y', 'z']], atlas.affine)
# FIND NEAREST NEIGHBOR WITHIN ATLAS IF FNIRS COORDINATE NOT IN ATLAS
ijk = np.argwhere(a_dat > 0) # get indices of non-zero atlas points
kdtree = KDTree(ijk) # prepare KDTree class
dist, points = kdtree.query(df[['i', 'j', 'k']], 1) # find nearest neighbors
df.loc[:, ['i_', 'j_', 'k_']] = ijk[points] # write coordinates to df
df.loc[:, 'dist'] = dist # write distance to df; if dist == 0, then i,j,k == i_,j_,k_
lgr.info(f'Finding nearest atlas region for each coordinate. Maximum distance = {df.dist.max():1.2f}.')
# GET REGION ASSOCIATED WITH COORDINATES
regions = []
for _, row in df.iterrows(): # iterate over fNIRS coordinates
idx = a_dat[(row['i_'], row['j_'], row['k_'])] # get atlas index of coordinate
label = labels['label'][labels['idx']==idx].values[0] # get atlas label
regions.append([idx, label]) # save
df.loc[:, ['atlas_idx', 'atlas_region']] = regions # append to fnirs dataframe
# GET FNIRS RESULTS
lgr.info('Extracting parcel-wise results...')
result = []
for region_idx in df['atlas_idx'].unique():
# characterize atlas region anatomically using anatomical atlas, default to AAL
region_info, _ = get_statmap_info(math_img(f'a=={region_idx}', a=atlas), 1, ['aal', 'talairach_ba'], 1)
# get all df rows with coordinate in atlas region
df_region = df[df['atlas_idx']==region_idx]
# get counts
# channels
n_ch_all = len(df_region)
n_ch_sync = df_region['sync'].sum()
# experiments
exp_all = df_region['publication'].unique()
exp_sync = df_region['publication'][df_region['sync']==1].unique()
n_exp_all = len(exp_all)
n_exp_sync = len(exp_sync)
# subjects
n_sub_all = np.nansum([df_region['n'][df_region['publication']==pub].values[0] for pub in exp_all])
n_sub_sync = np.nansum([df_region['n'][df_region['publication']==pub].values[0] for pub in exp_sync])
# get ratios
ch_ratio = n_ch_sync / n_ch_all
ch_ratio_ch_sync = ch_ratio * n_ch_sync
ch_ratio_sub_all = ch_ratio * n_sub_all
ch_ratio_exp_all = ch_ratio * n_exp_all
# save result
r = dict(
region = labels['label'][labels['idx']==region_idx].values[0],
region_idx = region_idx,
n_ch_all = n_ch_all,
n_ch_sync = n_ch_sync,
n_exp_all = n_exp_all,
n_exp_sync = n_exp_sync,
n_sub_all = n_sub_all,
n_sub_sync = n_sub_sync,
ch_ratio = ch_ratio,
ch_ratio_ch_sync = ch_ratio_ch_sync,
ch_ratio_sub_all = ch_ratio_sub_all,
ch_ratio_exp_all = ch_ratio_exp_all,
exp_all = ', '.join(exp_all),
exp_sync = ', '.join(exp_sync),
AAL = region_info['aal'][0],
BA = region_info['talairach_ba'][0]
)
# append to list
result.append(r)
# convert to dataframe, sort, save
result_df = pd.DataFrame(result)
result_df.sort_values('ch_ratio_sub_all', ascending=False, inplace=True)
result_df.reset_index(drop=True)
# NEW VOLUMES
if volumes:
lgr.info('Creating volumes...')
# new data matrices
dat_sync, dat_ratio_sub, dat_ratio_exp = np.zeros(a_dat.shape), np.zeros(a_dat.shape), np.zeros(a_dat.shape) # empty data matrices
for _, row in result_df.iterrows():
s = row['n_ch_sync']
if s > 0:
dat_sync[a_dat==row['region_idx']] = s # number of significant channels
dat_ratio_sub[a_dat==row['region_idx']] = row['ch_ratio_sub_all'] # ratio of sig to all channels
dat_ratio_exp[a_dat==row['region_idx']] = row['ch_ratio_exp_all'] # ratio of sig to all channels weighted by total subjects
else: # fill parcels with -1 to indicate covered areas
dat_sync[a_dat==row['region_idx']] = -1
dat_ratio_sub[a_dat==row['region_idx']] = -1
dat_ratio_exp[a_dat==row['region_idx']] = -1
# create dict with volumes
vols = {'n_ch_sync': new_img_like(atlas, dat_sync),
'n_ch_ratio_sub_all': new_img_like(atlas, dat_ratio_sub),
'n_ch_ratio_exp_all': new_img_like(atlas, dat_ratio_exp)}
return result_df, vols, df
else:
return result_df, df
#=============================================================================
def fnirs_permute(fnirs_coord_df, region_indices, n_perm, seed=None, verbose=True):
"""
Calculated permutation-based p-values for the fnirs result as estimated in
the function above.
Input:
fnirs_coord_df = third output table form fnirs_result function
region_indices = region_indices to look at, must match {region_idx} in
fnirs_coord_df
n_perm = number of permutations
Output:
Three dictionaries with (1) permutated data, (2) p values, (3) q values,
in each dict there are three arrays stored for (1) number of sync channels,
(2) sync/all channel ratio weighted by subject number, and (3) weighted by
experiment number.
"""
# function to extract results ---------------------------------------------------------------------------------------
def get_fnirs_indices(df, region_idc):
# empty list to store results
ch_sync, ch_ratio_sub_all, ch_ratio_exp_all = [], [], []
# collect parcel-wise data (iterating parcels in the order of the original dataframe)
for region_idx in region_idc:
# all df rows with coordinate in atlas region
df_region = df[df['atlas_idx']==region_idx]
# get relevant counts
# channels
n_ch_all = len(df_region)
n_ch_sync = df_region['sync'].sum()
ch_ratio = n_ch_sync / n_ch_all
# experiments
exp_all = df_region['publication'].unique()
#exp_sync = df_region['publication'][df_region['sync']==1].unique()
n_exp_all = len(exp_all)
#n_exp_sync = len(exp_sync)
# subjects
n_sub_all = np.nansum([df_region['n'][df_region['publication']==pub].values[0] for pub in exp_all])
#n_sub_sync = np.nansum([df_region['n'][df_region['publication']==pub].values[0] for pub in exp_sync])
# collect results
ch_sync.append(n_ch_sync)
ch_ratio_sub_all.append(ch_ratio * n_sub_all)
ch_ratio_exp_all.append(ch_ratio * n_exp_all)
return ch_sync, ch_ratio_sub_all, ch_ratio_exp_all
# ---------------------------------------------------------------------------------------
# get real result
real_n_ch_sync, real_ch_ratio_sub_all, real_ch_ratio_exp_all = \
get_fnirs_indices(fnirs_coord_df, region_indices)
# get permuted results
perm_n_ch_sync = np.zeros((len(region_indices), n_perm))
perm_ch_ratio_sub_all = np.zeros((len(region_indices), n_perm))
perm_ch_ratio_exp_all = np.zeros((len(region_indices), n_perm))
np.random.seed(seed=seed)
for i in tqdm(range(n_perm), disable=not verbose):
# copy dataframe
df = fnirs_coord_df.copy(deep=True)
# drop parcel labels (dont fit anymore)
df.drop('atlas_region', axis=1)
# randomize coordinate-parcel-assignment
df['atlas_idx'] = np.random.permutation(df['atlas_idx'].values)
# save results
perm_n_ch_sync[:,i], perm_ch_ratio_sub_all[:,i], perm_ch_ratio_exp_all[:,i] = \
get_fnirs_indices(df, region_indices)
# make dfs
perm_n_ch_sync = pd.DataFrame(data=perm_n_ch_sync, index=region_indices)
perm_ch_ratio_sub_all = pd.DataFrame(data=perm_ch_ratio_sub_all, index=region_indices)
perm_ch_ratio_exp_all = pd.DataFrame(data=perm_ch_ratio_exp_all, index=region_indices)
# get p values
p_n_ch_sync, p_ch_ratio_sub_all, p_ch_ratio_exp_all = [], [], []
for i, region_idx in enumerate(region_indices):
p_n_ch_sync.append(null_to_p(real_n_ch_sync[i], perm_n_ch_sync.loc[region_idx,:], tail='upper'))
p_ch_ratio_sub_all.append(null_to_p(real_ch_ratio_sub_all[i], perm_ch_ratio_sub_all.loc[region_idx,:], tail='upper'))
p_ch_ratio_exp_all.append(null_to_p(real_ch_ratio_exp_all[i], perm_ch_ratio_exp_all.loc[region_idx,:], tail='upper'))
# fdr correction
_, q_n_ch_sync = fdrcorrection(p_n_ch_sync)
_, q_ch_ratio_sub_all = fdrcorrection(p_ch_ratio_sub_all)
_, q_ch_ratio_exp_all = fdrcorrection(p_ch_ratio_exp_all)
return(
dict(
perm_n_ch_sync = perm_n_ch_sync,
perm_ch_ratio_sub_all = perm_ch_ratio_sub_all,
perm_ch_ratio_exp_all = perm_ch_ratio_exp_all
),
dict(
p_n_ch_sync = p_n_ch_sync,
p_ch_ratio_sub_all = p_ch_ratio_sub_all,
p_ch_ratio_exp_all = p_ch_ratio_exp_all
),
dict(
q_n_ch_sync = q_n_ch_sync,
q_ch_ratio_sub_all = q_ch_ratio_sub_all,
q_ch_ratio_exp_all = q_ch_ratio_exp_all
),
)
#=============================================================================
def rand_fnirs_coords(fnirs_df, mask, publications=None, radius=10, seed=None, verbose=True):
"""
Takes the fnirs dataframe {fnirs_df} and randomizes coordinates of studies defined by {publications}
within a sphere with a radius of {radius} * voxelsize mm. If ids is None, all coordinates are
randomized. Df and mask must be in the same space!
"""
# copy dataset to leave the input original
fnirs_df = fnirs_df.copy(deep=True)
# set seed
np.random.seed(seed=seed)
if verbose: lgr.info(f'Randomizing coordinates within a sphere of {radius} mm.')
# get possible coordinates to sample from
m = load_img(mask)
m_dat = m.get_fdata()
# all possible coordinates
ijk_all = np.argwhere(m_dat > 0) # array with all non zero mask coordinates
kdtree = KDTree(ijk_all) # prepare KDTree class
# get MNI coordinates
xyz = fnirs_df[['x', 'y', 'z']]
xyz_na = xyz["x"].isnull().to_numpy()
# convert to world coordinates
ijk = mm2vox(xyz.iloc[~xyz_na,:], m.affine)
# get all coordinates in sphere around coordinate
_, points = kdtree.query(ijk, k=None, distance_upper_bound=radius)
# loop over coordinates
xyz_ = list()
for p in range(len(points)):
# get random coordinate
rand_coord = ijk_all[np.random.choice(points[p])]
# convert back to mni coordinate
xyz_.append(vox2mm(rand_coord, m.affine))
# write back to ds
fnirs_df.loc[~xyz_na, ['x', 'y', 'z']] = xyz_
return(fnirs_df)
#=============================================================================
def rand_nimare_coords(ds, mask, ids=None, radius=10, seed=None, verbose=True):
"""
Takes a dataset {ds} and randomizes coordinates of studies defined by {ids}
within a sphere with a radius of {radius} * voxelsize mm. If ids is None, all coordinates are
randomized. Dataset and mask must be in the same space!
"""
# copy dataset to leave the input original
ds = ds.copy()
# set seed
np.random.seed(seed=seed)
# get study ids
if ids is None:
ids = ds.ids
if verbose: lgr.info(f'Randomizing coordinates within a sphere of {radius} mm.')
# get possible coordinates to sample from
m = load_img(mask)
m_dat = m.get_fdata()
# all possible coordinates
ijk_all = np.argwhere(m_dat > 0) # array with all non zero mask coordinates
kdtree = KDTree(ijk_all) # prepare KDTree class
# iterate over ids
for id in ids:
if verbose: lgr.info(f'Randomizing coordinates from {id}.')
# get MNI coordinates
xyz = ds.coordinates[['x', 'y', 'z']][ds.coordinates.id==id]
# convert to world coordinates
ijk = mm2vox(xyz, m.affine)
# get all coordinates in sphere around coordinate
_, points = kdtree.query(ijk, k=None, distance_upper_bound=radius)
# loop over coordinates
xyz_ = list()
for p in range(len(points)):
# get random coordinate
rand_coord = ijk_all[np.random.choice(points[p])]
# convert back to mni coordinate
xyz_.append(vox2mm(rand_coord, m.affine))
# write back to ds
ds.coordinates.loc[ds.coordinates.id==id, ['x', 'y', 'z']] = xyz_
return(ds)