-
Notifications
You must be signed in to change notification settings - Fork 0
/
refs.bib
677 lines (610 loc) · 28.2 KB
/
refs.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
@article{hey09,
title={The Fourth Paradigm: Data-intensive Scientific Discovery},
author={Tony Hey and Stewart Tansley and Kristin Tolle},
journal={Microsoft Research},
volume={},
number={},
pages={},
year={2009},
publisher={}
}
@article{bechhofer2013linked,
title={Why linked data is not enough for scientists},
author={Bechhofer, Sean and Buchan, Iain and De Roure, David and Missier, Paolo and Ainsworth, John and Bhagat, Jiten and Couch, Philip and Cruickshank, Don and Delderfield, Mark and Dunlop, Ian and Matthew Gamble and
Danius Michaelides and Stuart Owen and David Newman and Shoaib Sufi and Carole Goble},
journal={Future Generation Computer Systems},
volume={29},
number={2},
pages={599--611},
year={2013},
publisher={Elsevier}
}
@article{madduri2018reproducible,
title={Reproducible big data science: A case study in continuous {FAIR}ness},
author={Madduri, Ravi K and Chard, Kyle and D'Arcy, Mike and Jung, Segun C and Rodriguez, Alexis and Sulakhe, Dinanath and Deutsch, Eric W and Funk, Cory and Heavner, Ben and Richards, Matthew and others},
journal={bioRxiv},
pages={268755},
year={2018},
publisher={Cold Spring Harbor Laboratory}
}
@inproceedings{GlobusAuth2016,
title = {Globus {A}uth: A Research Identity and Access Management Platform},
author = {Rachana Anathankrishnan and Kyle Chard and Ian Foster and Mattias Lidman and Brendan McCollam and Stephen Rosen and Steven Tuecke},
incollection = {16th International Conference on e-Science},
institution = {IEEE},
pages = {203--212},
year = 2016
}
@misc{TFBSdata,
year = 2018,
author = {Funk, Cory C and Jung, Segun and Richards, Matthew A and Rodriguez, Alex and Shannon, Paul and Donovan, Rory and Heavner, Ben and Chard, Kyle and Xiao, Yukai and Glusman, Gustavo and Erteskin-Taner, Nilufer and Golde, Todd and Toga, Arthur and Hood, Leroy and Van Horn, John D and Kesselman, Carl and Foster, Ian and Ament, Seth and Madduri, Ravi and Price, Nathan D},
title = {Data for transcription factor binding site atlas paper},
url = {https://doi.org/10.6084/m9.figshare.5924077}
}
@article{mathelier2015jaspar,
title={{JASPAR 2016}: A major expansion and update of the open-access database of transcription factor binding profiles},
author={Mathelier, Anthony and Fornes, Oriol and Arenillas, David J and Chen, Chih-yu and Denay, Gr{\'e}goire and Lee, Jessica and Shi, Wenqiang and Shyr, Casper and Tan, Ge and Worsley-Hunt, Rebecca and others},
journal={Nucleic Acids Research},
volume={44},
number={D1},
pages={D110--D115},
year={2015},
publisher={Oxford University Press}
}
@article{pachkov2006swissregulon,
title={Swiss{R}egulon: A database of genome-wide annotations of regulatory sites},
author={Pachkov, Mikhail and Erb, Ionas and Molina, Nacho and Van Nimwegen, Erik},
journal={Nucleic Acids Research},
volume={35},
number={suppl\_1},
pages={D127--D131},
year={2006},
publisher={Oxford University Press}
}
@article{hume2014uniprobe,
title={{UniPROBE}, update 2015: New tools and content for the online database of protein-binding microarray data on protein--DNA interactions},
author={Hume, Maxwell A and Barrera, Luis A and Gisselbrecht, Stephen S and Bulyk, Martha L},
journal={Nucleic Acids Research},
volume={43},
number={D1},
pages={D117--D122},
year={2014},
publisher={Oxford University Press}
}
@article{kulakovskiy2015hocomoco,
title={{HOCOMOCO}: Expansion and enhancement of the collection of transcription factor binding sites models},
author={Kulakovskiy, Ivan V and Vorontsov, Ilya E and Yevshin, Ivan S and Soboleva, Anastasiia V and Kasianov, Artem S and Ashoor, Haitham and Ba-Alawi, Wail and Bajic, Vladimir B and Medvedeva, Yulia A and Kolpakov, Fedor A and others},
journal={Nucleic Acids Research},
volume={44},
number={D1},
pages={D116--D125},
year={2015},
publisher={Oxford University Press}
}
@article{bonnazi17,
title={Should biomedical research be like {A}irbnb?},
author={Vivien R. Bonazzi and Philip E. Bourne},
journal={PLoS Biol},
volume={15},
number={},
pages={e2001818},
year={2017},
publisher={}
}
@inproceedings{claerbou1992electronic,
title={Electronic documents give reproducible research a new meaning},
author={Claerbou, Jon F and Karrenfach, Martin},
booktitle={Society of Exploration Geophysicists Annual Meeting},
year={1992}
}
@article{schwab2000making,
title={Making scientific computations reproducible},
author={Schwab, Matthias and Karrenbach, Martin and Claerbout, Jon},
journal={Computing in Science \& Engineering},
volume={2},
number={6},
pages={61--67},
year={2000},
publisher={IEEE}
}
@article{peng2011reproducible,
title={Reproducible research in computational science},
author={Peng, Roger D},
journal={Science},
volume={334},
number={6060},
pages={1226--1227},
year={2011},
publisher={American Association for the Advancement of Science}
}
@article{kurtzer2017singularity,
title={Singularity: Scientific containers for mobility of compute},
author={Kurtzer, Gregory M and Sochat, Vanessa and Bauer, Michael W},
journal={PLOS ONE},
volume={12},
number={5},
pages={e0177459},
year={2017},
publisher={Public Library of Science}
}
@misc{chamberlain2014using,
title={Using {D}ocker to support reproducible research},
author={Chamberlain, Ryan and Schommer, Jennifer},
url={https://doi.org/10.6084/m9.figshare.1101910.v1},
year={2014}
}
@article{merkel2014docker,
title={Docker: Lightweight {L}inux containers for consistent development and deployment},
author={Merkel, Dirk},
journal={Linux Journal},
volume={2014},
number={239},
pages={2},
year={2014},
publisher={Belltown Media}
}
@article{morin2012shining,
title={Shining light into black boxes},
author={Morin, A and Urban, J and Adams, PD and Foster, I and Sali, A and Baker, D and Sliz, P},
journal={Science},
volume={336},
number={6078},
pages={159--160},
year={2012},
publisher={American Association for the Advancement of Science}
}
@article{mesirov2010accessible,
title={Accessible reproducible research},
author={Mesirov, Jill P},
journal={Science},
volume={327},
number={5964},
pages={415--416},
year={2010},
publisher={American Association for the Advancement of Science}
}
@article{jensen2017rseqrep,
title={{RSEQREP: RNA-Seq Reports}, an open-source cloud-enabled framework for reproducible {RNA-Seq} data processing, analysis, and result reporting},
author={Jensen, Travis L and Frasketi, Michael and Conway, Kevin and Villarroel, Leigh and Hill, Heather and Krampis, Konstantinos and Goll, Johannes B},
journal={F1000Research},
volume={6},
year={2017}
}
@article{brody2017analysis,
title={Analysis commons, a team approach to discovery in a big-data environment for genetic epidemiology},
author={Brody, Jennifer A and Morrison, Alanna C and Bis, Joshua C and O'Connell, Jeffrey R and Brown, Michael R and Huffman, Jennifer E and Ames, Darren C and Carroll, Andrew and Conomos, Matthew P and Gabriel, Stacey and others},
journal={Nature Genetics},
volume={49},
number={11},
pages={1560--1563},
year={2017},
publisher={Nature Research}
}
@article{grossman2016toward,
title={Toward a shared vision for cancer genomic data},
author={Grossman, Robert L and Heath, Allison P and Ferretti, Vincent and Varmus, Harold E and Lowy, Douglas R and Kibbe, Warren A and Staudt, Louis M},
journal={New England Journal of Medicine},
volume={375},
number={12},
pages={1109--1112},
year={2016},
publisher={Mass Medical Soc}
}
@article{mattmann2013computing,
title={Computing: A vision for data science},
author={Mattmann, Chris A},
journal={Nature},
volume={493},
number={7433},
pages={473--475},
year={2013},
publisher={Nature Research}
}
@article{boyle2013biology,
title={Biology must develop its own big-data systems},
author={Boyle, John},
journal={Nature},
volume={499},
number={7456},
pages={7--8},
year={2013},
publisher={Nature Publishing Group}
}
@article{wilkinson16,
title={The {FAIR} Guiding Principles for scientific data management and stewardship},
author={Mark D. Wilkinson and Michel Dumontier and IJsbrand Jan Aalbersberg and Gabrielle Appleton and Myles Axton and Arie Baak and others},
journal={Scientific Data},
volume={3},
number={},
pages={160018},
year={2016},
publisher={}
}
@article{toga15,
title={Big biomedical data as the key resource for discovery science},
author={Arthur W Toga and Ian Foster and Carl Kesselman and Ravi Madduri and Kyle Chard and Eric W Deutsch and others},
journal={Journal of the American Medical Informatics Association},
volume={22},
number={6},
pages={1126--31},
year={2015},
publisher={}
}
@article{sandve2013ten,
title={Ten simple rules for reproducible computational research},
author={Sandve, Geir Kjetil and Nekrutenko, Anton and Taylor, James and Hovig, Eivind},
journal={PLoS Computational Biology},
volume={9},
number={10},
pages={e1003285},
year={2013},
publisher={Public Library of Science}
}
@article{deutsch2018bdqc,
title={{BDQC}: A general-purpose analytics tool for domain-blind validation of Big Data},
author={Deutsch, Eric and Kramer, Roger and Ames, Joseph and Bauman, Andrew and Campbell, David S and Chard, Kyle and Clark, Kristi and D'Arcy, Mike and Dinov, Ivo and Donovan, Rory and others},
journal={bioRxiv},
pages={258822},
year={2018},
publisher={Cold Spring Harbor Laboratory}
}
@inproceedings{chard16,
title={I'll take that to go: Big data bags and minimal identifiers for exchange of large, complex datasets},
author={Kyle Chard and Mike D'Arcy and Ben Heavner and Ian Foster and Carl Kesselman and Ravi Madduri and others},
booktitle={IEEE International Conference on Big Data},
volume={},
number={},
pages={319--328},
year={2016},
publisher={}
}
@article{boyko16,
title={The BagIt File Packaging Format (V0.97)},
author={{\relax Boyko A, Kunze J, Littman J, Madden L, Vargas B}},
journal={IETF Internet-Draft},
volume={},
number={},
pages={},
year={2016},
publisher={}
}
@article{encode12,
title={An integrated encyclopedia of {DNA} elements in the human genome},
author={{\relax ENCODE Project Consortium}},
journal={Nature},
volume={489},
number={},
pages={57--74},
year={},
publisher={2012}
}
@inproceedings{kluyver2016jupyter,
title={Jupyter {N}otebooks--a publishing format for reproducible computational workflows},
author={Kluyver, Thomas and Ragan-Kelley, Benjamin and P{\'e}rez, Fernando and Granger, Brian E and Bussonnier, Matthias and Frederic, Jonathan and Kelley, Kyle and Hamrick, Jessica B and Grout, Jason and Corlay, Sylvain and others},
booktitle={20th International Conference on Electronic Publishing},
pages={87--90},
year={2016}
}
@article{funk18,
author = {Funk, Cory C and Jung, Segun and Richards, Matthew A and Rodriguez, Alex and Shannon, Paul and Donovan, Rory and Heavner, Ben and Chard, Kyle and Xiao, Yukai and Glusman, Gustavo and Erteskin-Taner, Nilufer and Golde, Todd and Toga, Arthur and Hood, Leroy and Van Horn, John D and Kesselman, Carl and Foster, Ian and Ament, Seth and Madduri, Ravi and Price, Nathan D},
title = {Atlas of Transcription Factor Binding Sites from {ENCODE} DNase Hypersensitivity Data Across 27 Tissue Types},
journal = {bioRxiv},
url = {https://www.biorxiv.org/content/early/2018/01/27/252023},
doi = {10.1101/252023},
year = 2018
}
@article{chard14,
title={Efficient and Secure Transfer, Synchronization, and Sharing of Big Data},
author={{\relax Chard K, Tuecke S, Foster I}},
journal={IEEE Cloud Computing},
volume={1},
number={3},
pages={46--55},
year={2014},
publisher={}
}
@article{schuler16,
title={Accelerating data-driven discovery with scientific asset management},
author={{\relax Schuler RE, Kesselman C, Czajkowski K}},
journal={2016 IEEE 12th International Conference on e-Science (e-Science)},
volume={},
number={},
pages={},
year={2016},
publisher={}
}
@article{rex03,
title={The LONI Pipeline Processing Environment},
author={{\relax Rex DE, Ma JQ, Toga AW}},
journal={Neuroimage},
volume={19},
number={3},
pages={1033--48},
year={2003},
publisher={}
}
@article{madduri14,
title={Experiences Building Globus Genomics: A Next-Generation Sequencing Analysis Service using Galaxy, Globus, and Amazon Web Services},
author={{\relax Madduri RK, Sulakhe D, Lacinski L, Liu B, Rodriguez A, Chard K} and others},
journal={Concurrency and Computation},
volume={26},
number={13},
pages={2266--79},
year={2014},
publisher={}
}
@article{boyle2011high,
title={High-resolution genome-wide in vivo footprinting of diverse transcription factors in human cells},
author={Boyle, Alan P and Song, Lingyun and Lee, Bum-Kyu and London, Darin and Keefe, Damian and Birney, Ewan and Iyer, Vishwanath R and Crawford, Gregory E and Furey, Terrence S},
journal={Genome research},
volume={21},
number={3},
pages={456--464},
year={2011},
publisher={Cold Spring Harbor Lab}
}
@article{vivian2017toil,
title={Toil enables reproducible, open source, big biomedical data analyses},
author={Vivian, John and Rao, Arjun Arkal and Nothaft, Frank Austin and Ketchum, Christopher and Armstrong, Joel and Novak, Adam and Pfeil, Jacob and Narkizian, Jake and Deran, Alden D and Musselman-Brown, Audrey and others},
journal={Nature Biotechnology},
volume={35},
number={4},
pages={314--316},
year={2017},
publisher={Nature Research}
}
@article{kitchin2014big,
title={Big Data, new epistemologies and paradigm shifts},
author={Kitchin, Rob},
journal={Big Data \& Society},
volume={1},
number={1},
pages={2053951714528481},
year={2014},
publisher={SAGE Publications Sage UK: London, England}
}
@article{tenopir2011data,
title={Data sharing by scientists: practices and perceptions},
author={Tenopir, Carol and Allard, Suzie and Douglass, Kimberly and Aydinoglu, Arsev Umur and Wu, Lei and Read, Eleanor and Manoff, Maribeth and Frame, Mike},
journal={PLOS ONE},
volume={6},
number={6},
pages={e21101},
year={2011},
publisher={Public Library of Science}
}
@article{collins2015new,
title={A new initiative on precision medicine},
author={Collins, Francis S and Varmus, Harold},
journal={New England Journal of Medicine},
volume={372},
number={9},
pages={793--795},
year={2015},
publisher={Mass Medical Soc}
}
@article{marx2013biology,
title={Biology: The big challenges of big data},
author={Marx, Vivien},
journal={Nature},
volume={498},
number={7453},
pages={255--260},
year={2013},
publisher={Nature Research}
}
@book{hey2009fourth,
title={The fourth paradigm: Data-intensive scientific discovery},
author={Hey, Tony and Tansley, Stewart and Tolle, Kristin M},
year={2009},
publisher={Microsoft research Redmond, WA}
}
@article{piper13,
title={Wellington: A novel method for the accurate identification of digital genomic footprints from {DN}ase-seq data},
author={Jason Piper and Markus C Elze and Pierre Cauchy and Peter N Cockerill and Constanze Bonifer and Sascha Ott},
journal={Nucleic Acids Res},
volume={41},
number={21},
pages={e201},
year={2013},
publisher={}
}
@article{gusmao14,
title={Detection of active transcription factor binding sites with the combination of DNase hypersensitivity and histone modifications},
author={Eduardo G. Gusmao and Christoph Dieterich and Martin Zenke and Ivan G. Costa},
journal={Bioinformatics},
volume={30},
number={22},
pages={3143--51},
year={2014},
publisher={}
}
@article{sherwood14,
title={Discovery of directional and nondirectional pioneer transcription factors by modeling DNase profile magnitude and shape},
author={{\relax Sherwood RI, Hashimoto T, O'Donnell CW, Lewis S, Barkal AA, van Hoff JP} and others},
journal={Nat Biotechnol},
volume={32},
number={2},
pages={171--8},
year={2014},
publisher={}
}
@article{gusmao16,
title={Analysis of computational footprinting methods for DNase sequencing experiments},
author={{\relax Gusmao EG, Allhoff M, Zenke M, Costa IG}},
journal={Nature Methods},
volume={13},
number={4},
pages={303--9},
year={2016},
publisher={}
}
@article{li2009sequence,
title={The sequence alignment/map format and {SAM}tools},
author={Li, Heng and Handsaker, Bob and Wysoker, Alec and Fennell, Tim and Ruan, Jue and Homer, Nils and Marth, Gabor and Abecasis, Goncalo and Durbin, Richard},
journal={Bioinformatics},
volume={25},
number={16},
pages={2078--2079},
year={2009},
publisher={Oxford University Press}
}
@article{zaharia11,
title={Faster and More Accurate Sequence Alignment with {SNAP}},
author={{\relax Zaharia M, Bolosky WJ, Curtis K, Fox A, Patterson D, Shenker S} and others},
journal={arXiv preprint},
volume={},
number={},
pages={arXiv:1111.5572v1},
year={2011},
publisher={}
}
@article{chard2014efficient,
title={Efficient and secure transfer, synchronization, and sharing of big data},
author={Chard, Kyle and Tuecke, Steven and Foster, Ian},
journal={IEEE Cloud Computing},
volume={1},
number={3},
pages={46--55},
year={2014},
publisher={IEEE}
}
@article{grant2011fimo,
title={{FIMO}: Scanning for occurrences of a given motif},
author={Grant, Charles E and Bailey, Timothy L and Noble, William Stafford},
journal={Bioinformatics},
volume={27},
number={7},
pages={1017--1018},
year={2011},
publisher={Oxford University Press}
}
@misc{shannon17,
title={{MotifDb}: An Annotated Collection of {Protein-DNA} Binding Sequence Motifs. R package version 1.20.0},
author={Paul Shannon and Matt Richards},
note={},
volume={},
number={},
pages={},
year={2017}
}
@article{boyle08,
title={F-Seq: a feature density estimator for high-throughput sequence tags},
author={Alan P. Boyle and Justin Guinney and Gregory E. Crawford and Terrence S. Furey},
journal={Bioinformatics},
volume={24},
number={21},
pages={2537--2538},
year={2008},
publisher={Oxford Journals}
}
@article{bailey09,
title={{MEME Suite}: Tools for motif discovery and searching},
author={Timothy L. Bailey and Mikael Boden and Fabian A. Buske and Martin Frith and Charles E. Grant and Luca Clementi and Jingyuan Ren and Wilfred W. Li and William S. Noble},
journal={Nucleic Acids Research},
volume={37},
number={},
pages={W202--W208},
year={2009},
publisher={Oxford Journals}
}
@article{lawrence13,
title={Software for Computing and Annotating Genomic Ranges},
author={Michael Lawrence and Wolfgang Huber and Herv\'e Pag\`es and Patrick Aboyoun and Marc Carlson and Robert Gentleman and Martin T. Morgan and Vincent J. Carey},
journal={PLoS Computational Biology},
volume={9},
number={8},
pages={e1003118},
year={2013},
publisher={PLOS}
}
@misc{JavaCode,
title = {Java program for the automation of creating {D}ockerfile, building it, and pushing it to the {D}ocker {H}ub},
url = {https://github.com/globusgenomics/GlobusGenomics_Java}
}
@misc{Instructions,
title = {How to use the footprint databases},
url = {http://footprints.bdds.globusgenomics.org}
}
@misc{JSONLD,
title = {{JSON-LD 1.1: A JSON}-based Serialization for Linked Data},
author = {Manu Sporny and Dave Longley and Gregg Kellogg and Markus Lanthaler and Niklas Lindstr\"om},
url = {https://json-ld.org/spec/latest/json-ld/},
year = 2018
}
@article{paskin2005digital,
title={Digital object identifiers for scientific data},
author={Paskin, Norman},
journal={Data Science Journal},
volume={4},
pages={12--20},
year={2005},
publisher={CODATA}
}
@misc{Motifs,
title = {Generate the transcription factor binding motif catalog},
url = {https://github.com/globusgenomics/genomics-footprint/tree/master/generate_motif}
}
@article {wilkinson17fairmetrics,
author = {Wilkinson, Mark D and Sansone, Susanna-Assunta and Schultes, Erik and Doorn, Peter and Bonino da Silva Santos, Luiz Olavo and Dumontier, Michel},
title = {A design framework and exemplar metrics for FAIRness},
year = {2017},
doi = {10.1101/225490},
publisher = {Cold Spring Harbor Laboratory},
URL = {https://www.biorxiv.org/content/early/2017/12/01/225490},
eprint = {https://www.biorxiv.org/content/early/2017/12/01/225490.full.pdf},
journal = {bioRxiv}
}
@ARTICLE{ivo-pipeline2,
AUTHOR={Dinov, Ivo D. and Petrosyan, Petros and Liu, Zhizhong and Eggert, Paul and Hobel, Sam and Vespa, Paul and Woo Moon, Seok and Van Horn, John D. and Franco, Joseph and Toga, Arthur W.},
TITLE={High-throughput neuroimaging-genetics computational infrastructure},
JOURNAL={Frontiers in Neuroinformatics},
VOLUME={8},
PAGES={41},
YEAR={2014},
URL={https://www.frontiersin.org/article/10.3389/fninf.2014.00041},
DOI={10.3389/fninf.2014.00041},
ISSN={1662-5196},
ABSTRACT={Many contemporary neuroscientific investigations face significant challenges in terms of data management, computational processing, data mining and results interpretation. These four pillars define the core infrastructure necessary to plan, organize, orchestrate, validate and disseminate novel scientific methods, computational resources and translational healthcare findings. Data management includes protocols for data acquisition, archival, query, transfer, retrieval and aggregation. Computational processing involves the necessary software, hardware and networking infrastructure required to handle large amounts of heterogeneous neuroimaging, genetics, clinical and phenotypic data and meta-data. In this manuscript we describe the novel high-throughput neuroimaging-genetics computational infrastructure available at the Institute for Neuroimaging and Informatics (INI) and the Laboratory of Neuro Imaging (LONI) at University of Southern California (USC). INI and LONI include ultra-high-field and standard-field MRI brain scanners along with an imaging-genetics database for storing the complete provenance of the raw and derived data and meta-data. A unique feature of this architecture is the Pipeline environment, which integrates the data management, processing, transfer and visualization. Through its client-server architecture, the Pipeline environment provides a graphical user interface for designing, executing, monitoring validating, and disseminating of complex protocols that utilize diverse suites of software tools and web-services. These pipeline workflows are represented as portable XML objects which transfer the execution instructions and user specifications from the client user machine to remote pipeline servers for distributed computing. Using Alzheimer’s and Parkinson’s data, we provide several examples of translational applications using this infrastructure.}
}
@article{ivo-pipeline,
author = {Dinov, Ivo D. AND Heavner, Ben AND Tang, Ming AND Glusman, Gustavo AND Chard, Kyle AND Darcy, Mike AND Madduri, Ravi AND Pa, Judy AND Spino, Cathie AND Kesselman, Carl AND Foster, Ian AND Deutsch, Eric W. AND Price, Nathan D. AND Van Horn, John D. AND Ames, Joseph AND Clark, Kristi AND Hood, Leroy AND Hampstead, Benjamin M. AND Dauer, William AND Toga, Arthur W.},
journal = {PLOS ONE},
publisher = {Public Library of Science},
title = {Predictive Big Data Analytics: A Study of Parkinson’s Disease Using Large, Complex, Heterogeneous, Incongruent, Multi-Source and Incomplete Observations},
year = {2016},
month = {08},
volume = {11},
url = {https://doi.org/10.1371/journal.pone.0157077},
pages = {1-28},
abstract = {Background A unique archive of Big Data on Parkinson’s Disease is collected, managed and disseminated by the Parkinson’s Progression Markers Initiative (PPMI). The integration of such complex and heterogeneous Big Data from multiple sources offers unparalleled opportunities to study the early stages of prevalent neurodegenerative processes, track their progression and quickly identify the efficacies of alternative treatments. Many previous human and animal studies have examined the relationship of Parkinson’s disease (PD) risk to trauma, genetics, environment, co-morbidities, or life style. The defining characteristics of Big Data–large size, incongruency, incompleteness, complexity, multiplicity of scales, and heterogeneity of information-generating sources–all pose challenges to the classical techniques for data management, processing, visualization and interpretation. We propose, implement, test and validate complementary model-based and model-free approaches for PD classification and prediction. To explore PD risk using Big Data methodology, we jointly processed complex PPMI imaging, genetics, clinical and demographic data. Methods and Findings Collective representation of the multi-source data facilitates the aggregation and harmonization of complex data elements. This enables joint modeling of the complete data, leading to the development of Big Data analytics, predictive synthesis, and statistical validation. Using heterogeneous PPMI data, we developed a comprehensive protocol for end-to-end data characterization, manipulation, processing, cleaning, analysis and validation. Specifically, we (i) introduce methods for rebalancing imbalanced cohorts, (ii) utilize a wide spectrum of classification methods to generate consistent and powerful phenotypic predictions, and (iii) generate reproducible machine-learning based classification that enables the reporting of model parameters and diagnostic forecasting based on new data. We evaluated several complementary model-based predictive approaches, which failed to generate accurate and reliable diagnostic predictions. However, the results of several machine-learning based classification methods indicated significant power to predict Parkinson’s disease in the PPMI subjects (consistent accuracy, sensitivity, and specificity exceeding 96%, confirmed using statistical n-fold cross-validation). Clinical (e.g., Unified Parkinson's Disease Rating Scale (UPDRS) scores), demographic (e.g., age), genetics (e.g., rs34637584, chr12), and derived neuroimaging biomarker (e.g., cerebellum shape index) data all contributed to the predictive analytics and diagnostic forecasting. Conclusions Model-free Big Data machine learning-based classification methods (e.g., adaptive boosting, support vector machines) can outperform model-based techniques in terms of predictive precision and reliability (e.g., forecasting patient diagnosis). We observed that statistical rebalancing of cohort sizes yields better discrimination of group differences, specifically for predictive analytics based on heterogeneous and incomplete PPMI data. UPDRS scores play a critical role in predicting diagnosis, which is expected based on the clinical definition of Parkinson’s disease. Even without longitudinal UPDRS data, however, the accuracy of model-free machine learning based classification is over 80%. The methods, software and protocols developed here are openly shared and can be employed to study other neurodegenerative disorders (e.g., Alzheimer’s, Huntington’s, amyotrophic lateral sclerosis), as well as for other predictive Big Data analytics applications.},
number = {8},
doi = {10.1371/journal.pone.0157077}
}
@misc{culich17binder,
author = "Aaron Culich and Brian Granger and Tim Head and Chris Holdgraf and Yuvi Panda and Fernando Perez and Min Ragan-Kelley and Carol Willing",
title = "Binder: Enabling sharing and publication of reproducible computational research",
year = "2017",
month = "12",
url = "https://doi.org/10.6084/m9.figshare.5671840.v1",
doi = "10.6084/m9.figshare.5671840.v1"
}
@article{brinckman17wholetale,
author = "Adam Brinckman and Kyle Chard and Niall Gaffney and Mihael Hategan and Matthew B. Jones and Kacper Kowalik and Sivakumar Kulasekaran and Bertram Ludascher and Bryce D. Mecum and Jaroslaw Nabrzyski and Victoria Stodden and Ian J. Taylor and Matthew J. Turk and Kandace Turner",
title = {Computing Environments for Reproducibility: Capturing the ``Whole Tale''},
year = "2017",
journal={Future Generation Computer Systems},
}
@misc{cwl,
doi = {10.6084/m9.figshare.3115156.v2},
url = {http://dx.doi.org/10.6084/m9.figshare.3115156.v2},
author = {Peter Amstutz and Michael R. Crusoe and Neboj\usa Tijani\'c and Brad Chapman and
John Chilton and Michael Heuer and Andrey Kartashov and John Kern and Dan Leehr and
Herv\'e M\'enager and Maya Nedeljkovich and Matt Scales and Stian Soiland-Reyes and
Luka Stojanovic
},
publisher = {Figshare},
institution = {Common Workflow Language working group},
title = {Common Workflow Language, v1.0},
year = {2016}
}