-
Notifications
You must be signed in to change notification settings - Fork 1
/
Chapter_4.lyx
8603 lines (6322 loc) · 175 KB
/
Chapter_4.lyx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#LyX 2.0 created this file. For more info see http://www.lyx.org/
\lyxformat 413
\begin_document
\begin_header
\textclass article
\begin_preamble
\usepackage{amsmath}
%\newcommand\argmin{\operatornamewithlimits{arg\,min}}
%\DeclareMathOperator{\argmin}{arg\,min}
\def\argmin{\mathop{\operator@font arg\,min}}
\def\argmax{\mathop{\operator@font arg\,max}}
%\DeclareMathOperator{\argmax}{arg\,max}
\newcommand{\gini}{\mathtt{gini}}
\newcommand{\rce}{\mathtt{rce}}
\usepackage{siunitx}
\end_preamble
\use_default_options true
\maintain_unincluded_children false
\language english
\language_package default
\inputencoding auto
\fontencoding global
\font_roman palatino
\font_sans default
\font_typewriter default
\font_default_family default
\use_non_tex_fonts false
\font_sc false
\font_osf false
\font_sf_scale 100
\font_tt_scale 100
\graphics default
\default_output_format default
\output_sync 0
\bibtex_command default
\index_command default
\paperfontsize 12
\spacing onehalf
\use_hyperref false
\papersize a4paper
\use_geometry true
\use_amsmath 1
\use_esint 1
\use_mhchem 1
\use_mathdots 1
\cite_engine basic
\use_bibtopic false
\use_indices false
\paperorientation portrait
\suppress_date false
\use_refstyle 0
\index Index
\shortcut idx
\color #008000
\end_index
\leftmargin 20page%
\topmargin 15page%
\rightmargin 15page%
\bottommargin 15page%
\secnumdepth 3
\tocdepth 3
\paragraph_separation skip
\defskip bigskip
\quotes_language english
\papercolumns 1
\papersides 1
\paperpagestyle default
\tracking_changes false
\output_changes false
\html_math_output 0
\html_css_as_file 0
\html_be_strict false
\author 1 ""
\author 3 "Eleftherios Garyfallidis,,,"
\end_header
\begin_body
\begin_layout Section
Highly Efficient
\begin_inset Newline newline
\end_inset
Tractography Clustering
\begin_inset CommandInset label
LatexCommand label
name "sec:Highly-Efficient-Tractography"
\end_inset
\end_layout
\begin_layout Subsection
Overview
\end_layout
\begin_layout Standard
Current tractography propagation algorithms can generate very large tractographi
es which are difficult to interpret and visualize.
A clustering of some kind seems to be a solution to simplify the complexity
of these datasets and provide a useful segmentation; however most proposed
clustering algorithms are very slow and often need to calculate pairwise
distances of size
\begin_inset Formula $N\times N$
\end_inset
where
\begin_inset Formula $N$
\end_inset
is the number of tracks.
This amount of comparisons adds a heavy load on clustering algorithms forcing
them to be inefficient and therefore impractical for everyday analysis
as it is difficult to compute all these distances or even store them in
memory.
This adds a further overhead to the use of tractography for clinical applicatio
ns but also introduces a barrier on understanding and interpreting the quality
of diffusion data sets.
We show in this chapter that a stable, on average linear time clustering
algorithm exists.
We call this algorithm QuickBundles (QB).
QB can be used to generate meaningful clusters in seconds with minimum
memory consumption.
In our approach we do not need to calculate all pairwise distances unlike
most of the other existing methods.
Furthermore, we can update our clustering online or in parallel.
We show that we can generate meaningful clusters of the order of
\begin_inset Formula $1,000$
\end_inset
times faster than any other available method and that it can be used to
segment from a few hundred to many millions of tracks.
Moreover our method is multi-purpose; its results can either stand on their
own to explore the neuroanatomy directly, or the clustering technique can
be used as a precursor tool which reduces the dimensionality of the data,
which can then be used as an input to other algorithms of higher order
complexity, resulting in their greater efficiency.
Beyond the use of this algorithm to simplify tractographies, we show here
how it can help identify landmarks, create atlases, and compare and register
tractographies.
\end_layout
\begin_layout Subsection
Track distances and preprocessing
\begin_inset CommandInset label
LatexCommand label
name "sub:track-distances"
\end_inset
\end_layout
\begin_layout Standard
For clarity we first give brief details of various metrics for distances
between tracks as they are integral to an understanding of the track clustering
literature.
Numerous distance metrics between two trajectories have been proposed in
the literature, such as in
\begin_inset CommandInset citation
LatexCommand cite
key "Ding2003"
\end_inset
,
\begin_inset CommandInset citation
LatexCommand cite
key "MaddahIPMI2007"
\end_inset
,
\begin_inset CommandInset citation
LatexCommand cite
key "zhang2005dti"
\end_inset
with the most common being the Hausdorff distance found in
\begin_inset CommandInset citation
LatexCommand cite
key "corouge2004towards"
\end_inset
and many other studies.
We mainly use a very simple symmetric distance proposed in
\begin_inset CommandInset citation
LatexCommand cite
key "EGMB10"
\end_inset
and
\begin_inset CommandInset citation
LatexCommand cite
key "Visser2010"
\end_inset
which we call Minimum average Direct-Flip
\begin_inset Formula $\textrm{MDF}(s_{A},s_{B})$
\end_inset
distance between track
\begin_inset Formula $s_{A}$
\end_inset
and track
\begin_inset Formula $s_{b}$
\end_inset
(see Eq.
\begin_inset space ~
\end_inset
\begin_inset CommandInset ref
LatexCommand ref
reference "eq:direct_flip_distance"
\end_inset
).
This distance can be applied only when both tracks have the same number
of points.
Therefore, we assume that an initial downsampling of tracks has been implemente
d, where all segments on a track have the same length, and all tracks have
the same number of segments.
Under that assumption MDF is defined as:
\family roman
\series medium
\shape up
\size normal
\emph off
\bar no
\noun off
\color none
\begin_inset Formula
\begin{eqnarray}
\textrm{MDF}(s_{A},s_{B}) & = & \min(d_{\texttt{direct}},d_{\textrm{\texttt{flipped}}}),\,\textrm{where}\label{eq:direct_flip_distance}\\
d_{\textrm{\texttt{direct}}}(s_{A},s_{B}) & = & \frac{1}{K}\sum_{i=1}^{K}||\mathbf{x}_{i}^{A}-\mathbf{x}_{i}^{B}||_{2}\,\textrm{and}\nonumber \\
d_{\texttt{flipped}}(s_{A},s_{B}) & = & \frac{1}{K}\sum_{i=1}^{K}||\mathbf{x}_{i}^{A}-\mathbf{x}_{K-i}^{B}||_{2}\nonumber
\end{eqnarray}
\end_inset
\family default
\series default
\shape default
\size default
\emph default
\bar default
\noun default
\color inherit
\begin_inset ERT
status open
\begin_layout Plain Layout
\backslash
noindent
\end_layout
\end_inset
where
\begin_inset Formula $K$
\end_inset
is the number of points
\begin_inset Formula $\mathbf{x}_{i}$
\end_inset
on the two tracks
\begin_inset Formula $A$
\end_inset
and
\begin_inset Formula $B$
\end_inset
.
\end_layout
\begin_layout Standard
In some cases it is still valid to use a family of Hausdorff distances which
for simplicity we denote as MAM distances
\begin_inset ERT
status open
\begin_layout Plain Layout
--
\end_layout
\end_inset
short for Minimum, or Maximum, or Mean, Average Minimum distance (MAM).
We mostly use the Mean version of this family, (see Eq.
\begin_inset space ~
\end_inset
\begin_inset CommandInset ref
LatexCommand ref
reference "eq:mean_average_distance"
\end_inset
) but the others are potentially useful as they can weight different properties
of the tracks.
These distances are slower to compute than MDF but they can work with different
number of segments on tracks; a property that is useful for some applications.
The equations below show the formulation of these distances:
\begin_inset Formula
\begin{eqnarray}
d_{\textrm{avg}}(s_{A},s_{B}) & = & \frac{1}{K_{A}}\sum_{i=1}^{K_{A}}d(x_{i}^{A},s_{B}),\nonumber \\
d_{\textrm{min}}(s_{A},s_{B}) & = & \min_{j=1,...,K_{B}}d(\mathbf{x}_{i}^{A},s_{B}),\,\textrm{and}\label{eq:mininum_distance}\\
d_{\textrm{max}}(s_{A},s_{B}) & = & \max_{j=1,...,K_{B}}d(\mathbf{x}_{i}^{A},s_{B})\,\textrm{where}\label{eq:maximum distance}\\
d(\mathbf{x},s_{B}) & = & \min_{j=1,...,K_{B}}||\mathbf{x}-\mathbf{x}_{j}^{B}||_{2}.\nonumber \\
\textrm{MAM}_{\textrm{min}}(s_{A},s_{B}) & = & \min(d_{\textrm{avg}}(s_{A},s_{B}),d_{\textrm{avg}}(s_{B},s_{A}))\label{eq:min_average_distance}\\
\textrm{MAM}_{\textrm{max}}(s_{A},s_{B}) & = & \max(d_{\textrm{avg}}(s_{A},s_{B}),d_{\textrm{avg}}(s_{B},s_{A}))\nonumber \\
\textrm{MAM}_{\textrm{avg}}(s_{A},s_{B}) & = & (d_{\textrm{avg}}(s_{A},s_{B})+d_{\textrm{avg}}(s_{B},s_{A}))/2\label{eq:mean_average_distance}
\end{eqnarray}
\end_inset
\end_layout
\begin_layout Standard
\family roman
\series medium
\shape up
\size normal
\emph off
\bar no
\noun off
\color none
\begin_inset ERT
status open
\begin_layout Plain Layout
\backslash
noindent
\end_layout
\end_inset
where the number of points
\begin_inset Formula $K_{A}$
\end_inset
and
\begin_inset Formula $K_{B}$
\end_inset
on the two tracks are not necessarily the same.
For the same threshold value
\begin_inset Formula $\textrm{MAM}_{\textrm{min}}$
\end_inset
,
\lang british
\begin_inset Formula $\textrm{MAM}_{\textrm{max}}$
\end_inset
and
\begin_inset Formula $\textrm{MAM}_{\textrm{avg}}$
\end_inset
will give different results.
For example,
\lang english
\begin_inset Formula $\textrm{MAM}_{\textrm{min}}$
\end_inset
will bring together more short tracks with long tracks than
\begin_inset Formula $\textrm{MAM}_{\textrm{max}}$
\end_inset
and
\lang british
\begin_inset Formula $\textrm{MAM}_{\textrm{avg}}$
\end_inset
\lang english
will have an in between effect.
Finally, other distances than the average minimum based on the minimum
(see Eq.
\begin_inset space ~
\end_inset
\begin_inset CommandInset ref
LatexCommand ref
reference "eq:mininum_distance"
\end_inset
) or maximum distance (see Eq.
\begin_inset space ~
\end_inset
\begin_inset CommandInset ref
LatexCommand ref
reference "eq:maximum distance"
\end_inset
) can be used.
However, we have not investigated them in this thesis.
\end_layout
\begin_layout Standard
\begin_inset Float figure
wide false
sideways false
status open
\begin_layout Plain Layout
\begin_inset ERT
status open
\begin_layout Plain Layout
[th!]
\end_layout
\end_inset
\end_layout
\begin_layout Plain Layout
\noindent
\align center
\begin_inset Graphics
filename QB/Thesis/Fig_2_distances2.png
lyxscale 20
scale 60
rotateOrigin center
\end_inset
\begin_inset Caption
\begin_layout Plain Layout
Distances used in this work.
The main distance used is minimum average direct flip (MDF) distance
\begin_inset Formula $\textrm{MDF}=\min(d_{\textrm{\texttt{direct}}},d_{\texttt{flipped}})$
\end_inset
which is a symmetric distance that can deal with the track bi-directionality
problem and works on tracks which have the same number of points.
Another distance is the mean average distance which is again symmetric
but does not need for the tracks to have the same number of points
\begin_inset Formula $\textrm{MAM}_{\textrm{avg}}=(d_{avg}(s_{A},s_{B})+d_{avg}(s_{B},s_{A}))/2$
\end_inset
.
The components of both distances are shown; with solid lines we draw the
tracks, and then with dashed lines we connect the pairs of points of the
two tracks whose distances contribute to the overall metrics.
\end_layout
\end_inset
\begin_inset CommandInset label
LatexCommand label
name "Flo:Distances_used"
\end_inset
\end_layout
\end_inset
\end_layout
\begin_layout Standard
The main advantages of the MDF distance (see Eq.
\begin_inset space ~
\end_inset
\begin_inset CommandInset ref
LatexCommand ref
reference "eq:direct_flip_distance"
\end_inset
), are that it is fast to compute, it takes account of track direction issues
through consideration of both direct and flipped tracks, and that it is
easy to understand how it will behave, from the simplest case of parallel
equi-length tracks to the most complicated of very divergent tracks.
Another advantage is that it will separate short tracks from long tracks;
a track A that is half the length of track B will be relatively poorly
matched on MDF to B.
We will see later in this chapter that this helps to find broken or erroneous
tracks.
An asset of having tracks with the same number of points is that we can
easily do pairwise calculations on them; for example add two or more tracks
together to create a new average track.
We will see in the next section that track addition is a key property of
our clustering algorithm.
Some care should be taken into consideration with the number of points
allowed in a track (track downsampling).
We always keep the endpoints intact and then downsample in equidistant
segments.
This means that short tracks will have the same number of points as long
tracks.
Therefore, the curvature from the long tracks will be lost relative to
the short tracks i.e.
the short tracks will have higher resolution.
We found empirically that this is not an important issue and that for clusterin
g purposes even downsampling to only
\begin_inset Formula $3$
\end_inset
points in total could be useful
\begin_inset CommandInset citation
LatexCommand cite
key "EGMB10"
\end_inset
.
Depending on the application less or more points can be used.
\begin_inset Note Note
status collapsed
\begin_layout Plain Layout
MATTHEW: Discussion here of the advantages and disadvantages of different
numbers of points allowed in tracks? For example, short tracks having the
same number of points as long tracks means that more of the curvature etc
data from the long tracks will be lost relative to the short tracks - I
suppose.
ELEF: I will work on it shortly.
\end_layout
\end_inset
\end_layout
\begin_layout Subsection
Related Work
\end_layout
\begin_layout Standard
During the last
\begin_inset Formula $10$
\end_inset
years there have been numerous efforts from many researchers to address
the unsupervised and supervised learning problems of brain tractography.
As far as we know all these methods suffer from low efficiency, however
they provide many useful ideas which we describe in this section.
\begin_inset Note Note
status collapsed
\begin_layout Plain Layout
MATTHEW: I got a bit lost in this section.
Is there a way of summarizing the papers under different themes such as
distance metric used, cluster number finding, clustering method or something
like that? IAN: I am planning to review the structure of this section.
ELEF: I made some changes.
I hope it looks better now.
\end_layout
\end_inset
\end_layout
\begin_layout Standard
Tractography clustering algorithms are rarely compared in the literature.
Nonetheless, Moberts et al.
\begin_inset space ~
\end_inset
\begin_inset CommandInset citation
LatexCommand cite
key "moberts2005evaluation"
\end_inset
are an exception.
They evaluated different popular hierarchical clustering methods including
a less common one, shared nearest neighbor (SNN), against a gold standard
segmentation by physicians.
The authors concluded that single-link clustering with mean average distance
was the method which performed best.
Wang et al.
\begin_inset space ~
\end_inset
\begin_inset CommandInset citation
LatexCommand cite
key "wang2010tractography"
\end_inset
proposed a nonparametric Bayesian framework using hierarchical Dirichlet
processes mixture model (HDPM).
This is one of the very few methods not based on distances.
In this work a track is modeled as a discrete distribution over a codebook
of discretized orientations and voxel regions.
The authors explain that calculating pairwise distances is very time consuming
and therefore they avoid using them.
Their approach automatically learns the number of clusters from data with
Dirichlet processes priors but it is still not efficient enough for real
time operation.
A disadvantage of this method is that the priors do not originate from
anatomical knowledge.
\end_layout
\begin_layout Standard
Visser et al.
\begin_inset space ~
\end_inset
\begin_inset CommandInset citation
LatexCommand cite
key "Visser2010"
\end_inset
used hierarchical clustering and fuzzy c-means together with recombination
of subsets of the same tractography to reduce the effect of the large datasets
on the distance matrix based on the MDF distance (see section
\begin_inset space ~
\end_inset
\begin_inset CommandInset ref
LatexCommand ref
reference "sub:track-distances"
\end_inset
)
\begin_inset CommandInset citation
LatexCommand cite
key "EGMB10"
\end_inset
.
An interesting result with this method was that they could automatically
find the different sub-bundles of the Arcuate Fasciculus region in accordance
with the supervised labeling described in
\begin_inset CommandInset citation
LatexCommand cite
key "catani2005perisylvian"
\end_inset
.
The algorithm that we present in this chapter also uses the minimum average
flip (MDF) metric as a measure of distance between tracks.
Gerig et al.
\begin_inset space ~
\end_inset
\begin_inset CommandInset citation
LatexCommand cite
key "gerig2004analysis"
\end_inset
also used hierarchical clustering with a symmetrised version of closest
point distances,
\begin_inset Formula $\mathrm{MA}\mathrm{M}_{\mathrm{avg}}$
\end_inset
and
\begin_inset Formula $\mathrm{MA}\mathrm{M}_{\mathrm{max}}$
\end_inset
(Hausdorff).
However, they tested their method with only two bundles: Uncinate Fasciculus
and the Corticospinal Tract.
\end_layout
\begin_layout Standard
Guevara et al.
\begin_inset space ~
\end_inset
\begin_inset CommandInset citation
LatexCommand cite
key "Guevara2010"
\end_inset
combined a great number of different algorithms from hierarchical clustering
to 3D watershed on track extremities.
They first divided the tractography into left-right hemisphere, inter-hemispher
ic and cerebellum subsets.
They then created further subsets of different track length, used hierarchical
clustering based on the random voxel par- cels, used watershed over extremities
and finally used hierarchical clustering to merge the different sub-bundles
using the Hausdorff distance (see section
\begin_inset CommandInset ref
LatexCommand ref
reference "sub:track-distances"
\end_inset
).
This work stressed the need to divide the data set between shorter and
longer tracks.
Tsai et al.
\begin_inset space ~
\end_inset
\begin_inset CommandInset citation
LatexCommand cite
key "Tsai2007"
\end_inset
used a combination of cluster methods based on minimum spanning trees,
locally linear embedding and k-means.
They were able to incorporate both local and global structures by changing
a few parameters.
The main advantage of this method was that it showed a way to merge a chain
of neighbouring structures into one cluster.
Zhang and Laidlaw
\begin_inset CommandInset citation
LatexCommand cite
key "zhang2005dti"
\end_inset
used an agglomerative hierarchical clustering using the same distance as
in
\begin_inset CommandInset citation
LatexCommand cite
key "zhang2003visualizing"
\end_inset
and later in
\begin_inset CommandInset citation
LatexCommand cite
key "zhang2008identifying"
\end_inset
combined distance-based single linkage hierarchical clustering with expert
labeling of specific bundles.
Zvitia et al.
\begin_inset space ~
\end_inset
\begin_inset CommandInset citation
LatexCommand cite
key "zvitia2008adaptive"
\end_inset
,
\begin_inset CommandInset citation
LatexCommand cite
key "Zvitia2010"
\end_inset
used adaptive mean shift.
This is a clustering algorithm which finds automatically the number of
clusters.
This is in contrast for example with k-means that the user needs to prespecify
the number of clusters.
They also used this approach for direct registration of tractographies
but only with tractographies from the same subject.
El Kouby et al.
\begin_inset space ~
\end_inset
\begin_inset CommandInset citation
LatexCommand cite
key "ElKouby2005"
\end_inset
created a ROI-based connectivity matrix where the
\begin_inset Formula $i,j$
\end_inset
th entry of the matrix holds the number of tracks which connect
\begin_inset Formula $ROI_{i}$
\end_inset
to
\begin_inset Formula $ROI_{j}$
\end_inset
.
K-means was used afterwards on the rows of the matrix to cluster the tracks.
This technique can be used for clustering bundles across subjects.
\end_layout
\begin_layout Standard
Brun et al.
\begin_inset space ~
\end_inset
\begin_inset CommandInset citation
LatexCommand cite
key "brun2004clustering"
\end_inset
used the mean and covariance of the track as the feature space and normalized
cuts based on a graph theoretic approach for the segmentation.
Ding et al.
\begin_inset space ~
\end_inset
\begin_inset CommandInset citation
LatexCommand cite
key "Ding2003a"
\end_inset
used k-nearest neighbours, another agglomerative approach, applied to correspon
ding track segments.
Corouge et al.
\begin_inset space ~
\end_inset
\begin_inset CommandInset citation
LatexCommand cite
key "corouge2004towards"
\end_inset
used different types of track distances, e.g.
Hausdorff distances, and other geometric properties such as torsion and
curvature, and in
\begin_inset CommandInset citation
LatexCommand cite
key "Corouge2004"
\end_inset
and
\begin_inset CommandInset citation
LatexCommand cite
key "Corouge2006"
\end_inset
used Generalized Procrustes Analysis and Principal Components Analysis
(PCA) to analyze the shape of bundles.
\end_layout
\begin_layout Standard
O'Donnell et al.
\begin_inset space ~
\end_inset
\begin_inset CommandInset citation
LatexCommand cite
key "ODonnell_IEEETMI07"
\end_inset
created a tractographic atlas using spectral embedding and expert anatomical
labeling.
They then automatically segmented using spectral clustering and expressed
the tracks as points in the embedded space to the closest existing atlas
clusters.
The full affinity matrix was too big to compute, therefore they used the
Nystrom approximation: working on a subset and avoid generating the complete
affinity/distance matrix.
Later in
\begin_inset CommandInset citation
LatexCommand cite
key "o2009tract"
\end_inset
they tried group analysis on prespecified bundles.
\end_layout
\begin_layout Standard
Maddah et al.
\begin_inset space ~
\end_inset
\begin_inset CommandInset citation
LatexCommand cite
key "Maddah_MICCA2005"
\end_inset
used B-spline representations of tracks referenced to an atlas, and then
the tracks were clustered based on the labeled atlas.
Later Maddah et al.
\begin_inset space ~
\end_inset
\begin_inset CommandInset citation
LatexCommand cite
key "maddah2006statistical"
\end_inset
using a similar track representation (quintic B-splines) calculated a model
for each bundle as the average and standard deviation of that parametric
representation.
In that way they created an atlas which is used as a prior for expectation
maximization (EM) clustering of the Corpus Callosum tracks into Witelson
subdivisions
\begin_inset CommandInset citation
LatexCommand cite
key "witelson1989hand"
\end_inset
using population averages.
Later in
\begin_inset CommandInset citation
LatexCommand cite
key "Maddah_IEEEBI2008"
\end_inset
Maddah et al.
\begin_inset space ~
\end_inset
it is showed that it is possible to combine spatial priors with metrics
for the shape of the tracks in order to guide the clustering process.
\end_layout
\begin_layout Standard
Jonasson et al.
\begin_inset space ~
\end_inset
\begin_inset CommandInset citation
LatexCommand cite
key "jonasson2005fiber"
\end_inset
created a large
\begin_inset Formula $N\times N$
\end_inset
co-occurrence matrix, where
\begin_inset Formula $N$
\end_inset
is the number of the fibers to cluster.
The co-occurrence (affinity) matrix contained the number of times that