-
Notifications
You must be signed in to change notification settings - Fork 2
/
ref.bib
941 lines (829 loc) · 37.4 KB
/
ref.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
@article{li20177,
title={A 7.663-TOPS 8.2-W Energy-efficient FPGA Accelerator for Binary Convolutional Neural Networks},
author={Li, Yixing and Liu, Zichuan and Xu, Kai and Yu, Hao and Ren, Fengbo},
journal={arXiv preprint arXiv:1702.06392},
year={2017}
}
@inproceedings{nakahara2017batch,
title={A Batch Normalization Free Binarized Convolutional Deep Neural Network on an FPGA},
author={Nakahara, Hiroki and Yonekawa, Haruyoshi and Iwamoto, Hisashi and Motomura, Masato},
booktitle={Proceedings of the 2017 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays},
pages={290--290},
year={2017},
organization={ACM}
}
@inproceedings{zhao2017accelerating,
title={Accelerating Binarized Convolutional Neural Networks with Software-Programmable FPGAs.},
author={Zhao, Ritchie and Song, Weinan and Zhang, Wentao and Xing, Tianwei and Lin, Jeng-Hau and Srivastava, Mani B and Gupta, Rajesh and Zhang, Zhiru},
booktitle={FPGA},
pages={15--24},
year={2017}
}
@article{aydonat2017opencl,
title={An OpenCL (TM) Deep Learning Accelerator on Arria 10},
author={Aydonat, Utku and O'Connell, Shane and Capalija, Davor and Ling, Andrew C and Chiu, Gordon R},
journal={arXiv preprint arXiv:1701.03534},
year={2017}
}
@inproceedings{han2017ese,
title={ESE: Efficient Speech Recognition Engine with Sparse LSTM on FPGA.},
author={Han, Song and Kang, Junlong and Mao, Huizi and Hu, Yiming and Li, Xin and Li, Yubin and Xie, Dongliang and Luo, Hong and Yao, Song and Wang, Yu and others},
booktitle={FPGA},
pages={75--84},
year={2017}
}
@inproceedings{umuroglu2017finn,
title={Finn: A framework for fast, scalable binarized neural network inference},
author={Umuroglu, Yaman and Fraser, Nicholas J and Gambardella, Giulio and Blott, Michaela and Leong, Philip and Jahre, Magnus and Vissers, Kees},
booktitle={Proceedings of the 2017 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays},
pages={65--74},
year={2017},
organization={ACM}
}
@inproceedings{venieris2017fpgaconvnet,
title={fpgaConvNet: Automated Mapping of Convolutional Neural Networks on FPGAs},
author={Venieris, Stylianos I and Bouganis, Christos-Savvas},
booktitle={Proceedings of the 2017 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays},
pages={291--292},
year={2017},
organization={ACM}
}
@inproceedings{zhang2017frequency,
title={Frequency domain acceleration of convolutional neural networks on CPU-FPGA shared memory system},
author={Zhang, Chi and Prasanna, Viktor},
booktitle={Proceedings of the 2017 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays},
pages={35--44},
year={2017},
organization={ACM}
}
@inproceedings{zhang2017improving,
title={Improving the Performance of OpenCL-based FPGA Accelerator for Convolutional Neural Network.},
author={Zhang, Jialiang and Li, Jing},
booktitle={FPGA},
pages={25--34},
year={2017}
}
@inproceedings{ma2017optimizing,
title={Optimizing Loop Operation and Dataflow in FPGA Acceleration of Deep Convolutional Neural Networks},
author={Ma, Yufei and Cao, Yu and Vrudhula, Sarma and Seo, Jae-sun},
booktitle={Proceedings of the 2017 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays},
pages={45--54},
year={2017},
organization={ACM}
}
@inproceedings{qiu2016going,
title={Going deeper with embedded fpga platform for convolutional neural network},
author={Qiu, Jiantao and Wang, Jie and Yao, Song and Guo, Kaiyuan and Li, Boxun and Zhou, Erjin and Yu, Jincheng and Tang, Tianqi and Xu, Ningyi and Song, Sen and others},
booktitle={Proceedings of the 2016 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays},
pages={26--35},
year={2016},
organization={ACM}
}
@inproceedings{suda2016throughput,
title={Throughput-optimized OpenCL-based FPGA accelerator for large-scale convolutional neural networks},
author={Suda, Naveen and Chandra, Vikas and Dasika, Ganesh and Mohanty, Abinash and Ma, Yufei and Vrudhula, Sarma and Seo, Jae-sun and Cao, Yu},
booktitle={Proceedings of the 2016 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays},
pages={16--25},
year={2016},
organization={ACM}
}
@inproceedings{zhang2015optimizing,
title={Optimizing fpga-based accelerator design for deep convolutional neural networks},
author={Zhang, Chen and Li, Peng and Sun, Guangyu and Guan, Yijin and Xiao, Bingjun and Cong, Jason},
booktitle={Proceedings of the 2015 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays},
pages={161--170},
year={2015},
organization={ACM}
}
@inproceedings{guan2017fp,
title={FP-DNN: An Automated Framework for Mapping Deep Neural Networks onto FPGAs with RTL-HLS Hybrid Templates},
author={Guan, Yijin and Liang, Hao and Xu, Ningyi and Wang, Wenqiang and Shi, Shaoshuai and Chen, Xi and Sun, Guangyu and Zhang, Wei and Cong, Jason},
booktitle={Field-Programmable Custom Computing Machines (FCCM), 2017 IEEE 25th Annual International Symposium on},
pages={152--159},
year={2017},
organization={IEEE}
}
@inproceedings{lu2017evaluating,
title={Evaluating fast algorithms for convolutional neural networks on fpgas},
author={Lu, Liqiang and Liang, Yun and Xiao, Qingcheng and Yan, Shengen},
booktitle={Field-Programmable Custom Computing Machines (FCCM), 2017 IEEE 25th Annual International Symposium on},
pages={101--108},
year={2017},
organization={IEEE}
}
@inproceedings{samragh2017customizing,
title={Customizing neural networks for efficient fpga implementation},
author={Samragh, Mohammad and Ghasemzadeh, Mohammad and Koushanfar, Farinaz},
booktitle={Field-Programmable Custom Computing Machines (FCCM), 2017 IEEE 25th Annual International Symposium on},
pages={85--92},
year={2017},
organization={IEEE}
}
@inproceedings{shen2017escher,
title={Escher: A CNN Accelerator with Flexible Buffering to Minimize Off-Chip Transfer},
author={Shen, Yongming and Ferdman, Michael and Milder, Peter},
booktitle={Proceedings of the 25th IEEE International Symposium on Field-Programmable Custom Computing Machines (FCCM’17). IEEE Computer Society, Los Alamitos, CA, USA},
year={2017}
}
@inproceedings{guo2017bit,
title={Bit-Width Based Resource Partitioning for CNN Acceleration on FPGA},
author={Guo, Jianxin and Yin, Shouyi and Ouyang, Peng and Liu, Leibo and Wei, Shaojun},
booktitle={Field-Programmable Custom Computing Machines (FCCM), 2017 IEEE 25th Annual International Symposium on},
pages={31--31},
year={2017},
organization={IEEE}
}
@inproceedings{podili2017fast,
title={Fast and efficient implementation of Convolutional Neural Networks on FPGA},
author={Podili, Abhinav and Zhang, Chi and Prasanna, Viktor},
booktitle={Application-specific Systems, Architectures and Processors (ASAP), 2017 IEEE 28th International Conference on},
pages={11--18},
year={2017},
organization={IEEE}
}
@inproceedings{colangelo2017fine,
title={Fine-Grained Acceleration of Binary Neural Networks Using Intel{\textregistered} Xeon{\textregistered} Processor with Integrated FPGA},
author={Colangelo, Philip and Huang, Randy and Luebbers, Enno and Margala, Martin and Nealis, Kevin},
booktitle={Field-Programmable Custom Computing Machines (FCCM), 2017 IEEE 25th Annual International Symposium on},
pages={135--135},
year={2017},
organization={IEEE}
}
@inproceedings{morcel2017minimalist,
title={Minimalist Design for Accelerating Convolutional Neural Networks for Low-End FPGA Platforms},
author={Morcel, Raghid and Akkary, Haitham and Hajj, Hazem and Saghir, Mazen and Keshavamurthy, Anil and Khanna, Rahul and Artail, Hassan},
booktitle={Field-Programmable Custom Computing Machines (FCCM), 2017 IEEE 25th Annual International Symposium on},
pages={196--196},
year={2017},
organization={IEEE}
}
@inproceedings{hegde2016evaluating,
title={Evaluating Embedded FPGA Accelerators for Deep Learning Applications},
author={Hegde, Gopalakrishna and Ramasamy, Nachiappan and Buddha, Vamsi and Kapre, Nachiket and others},
booktitle={Field-Programmable Custom Computing Machines (FCCM), 2016 IEEE 24th Annual International Symposium on},
pages={25--25},
year={2016},
organization={IEEE}
}
@inproceedings{li2015fpga,
title={Fpga acceleration of recurrent neural network based language model},
author={Li, Sicheng and Wu, Chunpeng and Li, Hai and Li, Boxun and Wang, Yu and Qiu, Qinru},
booktitle={Field-Programmable Custom Computing Machines (FCCM), 2015 IEEE 23rd Annual International Symposium on},
pages={111--118},
year={2015},
organization={IEEE}
}
@inproceedings{zhou2015fpga,
title={FPGA design for PCANet deep learning network},
author={Zhou, Yuteng and Wang, Wei and Huang, Xinming},
booktitle={Field-Programmable Custom Computing Machines (FCCM), 2015 IEEE 23rd Annual International Symposium on},
pages={232--232},
year={2015},
organization={IEEE}
}
@inproceedings{nakahara2017fully,
title={A fully connected layer elimination for a binarizec convolutional neural network on an FPGA},
author={Nakahara, Hiroki and Fujii, Tomoya and Sato, Shimpei},
booktitle={Field Programmable Logic and Applications (FPL), 2017 27th International Conference on},
pages={1--4},
year={2017},
organization={IEEE}
}
@inproceedings{wu2017high,
title={A high-throughput reconfigurable processing array for neural networks},
author={Wu, Ephrem and Zhang, Xiaoqian and Berman, David and Cho, Inkeun},
booktitle={Field Programmable Logic and Applications (FPL), 2017 27th International Conference on},
pages={1--4},
year={2017},
organization={IEEE}
}
@inproceedings{jiao2017accelerating,
title={Accelerating low bit-width convolutional neural networks with embedded FPGA},
author={Jiao, Li and Luo, Cheng and Cao, Wei and Zhou, Xuegong and Wang, Lingli},
booktitle={Field Programmable Logic and Applications (FPL), 2017 27th International Conference on},
pages={1--4},
year={2017},
organization={IEEE}
}
@inproceedings{ma2017automatic,
title={An automatic RTL compiler for high-throughput FPGA implementation of diverse deep convolutional neural networks},
author={Ma, Yufei and Cao, Yu and Vrudhula, Sarma and Seo, Jae-sun},
booktitle={Field Programmable Logic and Applications (FPL), 2017 27th International Conference on},
pages={1--8},
year={2017},
organization={IEEE}
}
@inproceedings{fan2017f,
title={F-C3D: FPGA-based 3-dimensional convolutional neural network},
author={Fan, Hongxiang and Niu, Xinyu and Liu, Qiang and Luk, Wayne},
booktitle={Field Programmable Logic and Applications (FPL), 2017 27th International Conference on},
pages={1--4},
year={2017},
organization={IEEE}
}
@inproceedings{moss2017high,
title={High performance binary neural networks on the Xeon+ FPGA™ platform},
author={Moss, Duncan JM and Nurvitadhi, Eriko and Sim, Jaewoong and Mishra, Asit and Marr, Debbie and Subhaschandra, Suchit and Leong, Philip HW},
booktitle={Field Programmable Logic and Applications (FPL), 2017 27th International Conference on},
pages={1--4},
year={2017},
organization={IEEE}
}
@inproceedings{zhang2017high,
title={High-performance video content recognition with long-term recurrent convolutional network for FPGA},
author={Zhang, Xiaofan and Liu, Xinheng and Ramachandran, Anand and Zhuge, Chuanhao and Tang, Shibin and Ouyang, Peng and Cheng, Zuofu and Rupnow, Kyle and Chen, Deming},
booktitle={Field Programmable Logic and Applications (FPL), 2017 27th International Conference on},
pages={1--4},
year={2017},
organization={IEEE}
}
@inproceedings{venieris2017latency,
title={Latency-driven design for FPGA-based convolutional neural networks},
author={Venieris, Stylianos I and Bouganis, Christos-Savvas},
booktitle={Field Programmable Logic and Applications (FPL), 2017 27th International Conference on},
pages={1--8},
year={2017},
organization={IEEE}
}
@inproceedings{lu2017leveraging,
title={Leveraging FVT-margins in design space exploration for FFGA-based CNN accelerators},
author={Lu, Weina and Lu, Wenyan and Ye, Jing and Hu, Yu and Li, Xiaowei},
booktitle={Field Programmable Logic and Applications (FPL), 2017 27th International Conference on},
pages={1--4},
year={2017},
organization={IEEE}
}
@inproceedings{vestias2017parallel,
title={Parallel dot-products for deep learning on FPGA},
author={V{\'e}stias, M{\'a}rio and Duarte, Rui Policarpo and de Sousa, Jos{\'e} T and Neto, Hor{\'a}cio},
booktitle={Field Programmable Logic and Applications (FPL), 2017 27th International Conference on},
pages={1--4},
year={2017},
organization={IEEE}
}
@inproceedings{prost2017scalable,
title={Scalable high-performance architecture for convolutional ternary neural networks on FPGA},
author={Prost-Boucle, Adrien and Bourge, Alban and P{\'e}trot, Fr{\'e}d{\'e}ric and Alemdar, Hande and Caldwell, Nicholas and Leroy, Vincent},
booktitle={Field Programmable Logic and Applications (FPL), 2017 27th International Conference on},
pages={1--7},
year={2017},
organization={IEEE}
}
@inproceedings{li2016high,
title={A high performance FPGA-based accelerator for large-scale convolutional neural networks},
author={Li, Huimin and Fan, Xitian and Jiao, Li and Cao, Wei and Zhou, Xuegong and Wang, Lingli},
booktitle={Field Programmable Logic and Applications (FPL), 2016 26th International Conference on},
pages={1--9},
year={2016},
organization={IEEE}
}
@inproceedings{nurvitadhi2016accelerating1,
title={Accelerating recurrent neural networks in analytics servers: comparison of FPGA, CPU, GPU, and ASIC},
author={Nurvitadhi, Eriko and Sim, Jaewoong and Sheffield, David and Mishra, Asit and Krishnan, Srivatsan and Marr, Debbie},
booktitle={Field Programmable Logic and Applications (FPL), 2016 26th International Conference on},
pages={1--4},
year={2016},
organization={IEEE}
}
@inproceedings{shen2016overcoming,
title={Overcoming resource underutilization in spatial CNN accelerators},
author={Shen, Yongming and Ferdman, Michael and Milder, Peter},
booktitle={Field Programmable Logic and Applications (FPL), 2016 26th International Conference on},
pages={1--4},
year={2016},
organization={IEEE}
}
@inproceedings{nakahara2015deep,
title={A deep convolutional neural network based on nested residue number system},
author={Nakahara, Hiroki and Sasao, Tsutomu},
booktitle={Field Programmable Logic and Applications (FPL), 2015 25th International Conference on},
pages={1--6},
year={2015},
organization={IEEE}
}
@inproceedings{liu2016automatic,
title={Automatic code generation of convolutional neural networks in FPGA implementation},
author={Liu, Zhiqiang and Dou, Yong and Jiang, Jingfei and Xu, Jinwei},
booktitle={Field-Programmable Technology (FPT), 2016 International Conference on},
pages={61--68},
year={2016},
organization={IEEE}
}
@inproceedings{nurvitadhi2016accelerating,
title={Accelerating Binarized Neural Networks: Comparison of FPGA, CPU, GPU, and ASIC},
author={Nurvitadhi, Eriko and Sheffield, David and Sim, Jaewoong and Mishra, Asit and Venkatesh, Ganesh and Marr, Debbie},
booktitle={Field-Programmable Technology (FPT), 2016 International Conference on},
pages={77--84},
year={2016},
organization={IEEE}
}
@inproceedings{dicecco2016caffeinated,
title={Caffeinated FPGAs: FPGA Framework For Convolutional Neural Networks},
author={DiCecco, Roberto and Lacey, Griffin and Vasiljevic, Jasmina and Chow, Paul and Taylor, Graham and Areibi, Shawki},
booktitle={Field-Programmable Technology (FPT), 2016 International Conference on},
pages={265--268},
year={2016},
organization={IEEE}
}
@inproceedings{nakahara2016memory,
title={A memory-based realization of a binarized deep convolutional neural network},
author={Nakahara, Hiroki and Yonekawa, Haruyoshi and Sasao, Tsutomu and Iwamoto, Hisashi and Motomura, Masato},
booktitle={Field-Programmable Technology (FPT), 2016 International Conference on},
pages={277--280},
year={2016},
organization={IEEE}
}
@inproceedings{zhang2016energy,
title={Energy-Efficient CNN Implementation on a Deeply Pipelined FPGA Cluster},
author={Zhang, Chen and Wu, Di and Sun, Jiayu and Sun, Guangyu and Luo, Guojie and Cong, Jason},
booktitle={Proceedings of the 2016 International Symposium on Low Power Electronics and Design},
pages={326--331},
year={2016},
organization={ACM}
}
@inproceedings{wei2017automated,
title={Automated Systolic Array Architecture Synthesis for High Throughput CNN Inference on FPGAs},
author={Wei, Xuechao and Yu, Cody Hao and Zhang, Peng and Chen, Youxiang and Wang, Yuxin and Hu, Han and Liang, Yun and Cong, Jason},
booktitle={Proceedings of the 54th Annual Design Automation Conference 2017},
pages={29},
year={2017},
organization={ACM}
}
@inproceedings{xiao2017exploring,
title={Exploring Heterogeneous Algorithms for Accelerating Deep Convolutional Neural Networks on FPGAs},
author={Xiao, Qingcheng and Liang, Yun and Lu, Liqiang and Yan, Shengen and Tai, Yu-Wing},
booktitle={Proceedings of the 54th Annual Design Automation Conference 2017},
pages={62},
year={2017},
organization={ACM}
}
@inproceedings{wang2016deepburning,
title={DeepBurning: automatic generation of FPGA-based learning accelerators for the neural network family},
author={Wang, Ying and Xu, Jie and Han, Yinhe and Li, Huawei and Li, Xiaowei},
booktitle={Design Automation Conference (DAC), 2016 53nd ACM/EDAC/IEEE},
pages={1--6},
year={2016},
organization={IEEE}
}
@inproceedings{motamedi2016design,
title={Design space exploration of fpga-based deep convolutional neural networks},
author={Motamedi, Mohammad and Gysel, Philipp and Akella, Venkatesh and Ghiasi, Soheil},
booktitle={Design Automation Conference (ASP-DAC), 2016 21st Asia and South Pacific},
pages={575--580},
year={2016},
organization={IEEE}
}
@inproceedings{guan2017fpga,
title={FPGA-based accelerator for long short-term memory recurrent neural networks},
author={Guan, Yijin and Yuan, Zhihang and Sun, Guangyu and Cong, Jason},
booktitle={Design Automation Conference (ASP-DAC), 2017 22nd Asia and South Pacific},
pages={629--634},
year={2017},
organization={IEEE}
}
@inproceedings{gokhale2014240,
title={A 240 g-ops/s mobile coprocessor for deep neural networks},
author={Gokhale, Vinayak and Jin, Jonghoon and Dundar, Aysegul and Martini, Berin and Culurciello, Eugenio},
booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops},
pages={682--687},
year={2014}
}
@inproceedings{ding2017c,
title={CirCNN: accelerating and compressing deep neural networks using block-circulant weight matrices},
author={Ding, Caiwen and Liao, Siyu and Wang, Yanzhi and Li, Zhe and Liu, Ning and Zhuo, Youwei and Wang, Chao and Qian, Xuehai and Bai, Yu and Yuan, Geng and others},
booktitle={Proceedings of the 50th Annual IEEE/ACM International Symposium on Microarchitecture},
pages={395--408},
year={2017},
organization={ACM}
}
@inproceedings{sharma2016high,
title={From high-level deep neural models to FPGAs},
author={Sharma, Hardik and Park, Jongse and Mahajan, Divya and Amaro, Emmanuel and Kim, Joon Kyung and Shao, Chenkai and Mishra, Asit and Esmaeilzadeh, Hadi},
booktitle={Microarchitecture (MICRO), 2016 49th Annual IEEE/ACM International Symposium on},
pages={1--12},
year={2016},
organization={IEEE}
}
@inproceedings{alwani2016fused,
title={Fused-layer CNN accelerators},
author={Alwani, Manoj and Chen, Han and Ferdman, Michael and Milder, Peter},
booktitle={Microarchitecture (MICRO), 2016 49th Annual IEEE/ACM International Symposium on},
pages={1--12},
year={2016},
organization={IEEE}
}
@inproceedings{nguyen2017double,
title={Double MAC: Doubling the performance of convolutional neural networks on modern FPGAs},
author={Nguyen, Dong and Kim, Daewoo and Lee, Jongeun},
booktitle={2017 Design, Automation \& Test in Europe Conference \& Exhibition (DATE)},
pages={890--893},
year={2017},
organization={IEEE}
}
@inproceedings{rahman2017design,
title={Design space exploration of FPGA accelerators for convolutional neural networks},
author={Rahman, Atul and Oh, Sangyun and Lee, Jongeun and Choi, Kiyoung},
booktitle={2017 Design, Automation \& Test in Europe Conference \& Exhibition (DATE)},
pages={1147--1152},
year={2017},
organization={IEEE}
}
@inproceedings{shreejith2016accelerated,
title={Accelerated artificial neural networks on FPGA for fault detection in automotive systems},
author={Shreejith, Shanker and Anshuman, Bezborah and Fahmy, Suhaib A},
booktitle={Design, Automation \& Test in Europe Conference \& Exhibition (DATE), 2016},
pages={37--42},
year={2016},
organization={IEEE}
}
@inproceedings{rahman2016efficient,
title={Efficient FPGA acceleration of convolutional neural networks using logical-3D compute array},
author={Rahman, Atul and Lee, Jongeun and Choi, Kiyoung},
booktitle={Design, Automation \& Test in Europe Conference \& Exhibition (DATE), 2016},
pages={1393--1398},
year={2016},
organization={IEEE}
}
@inproceedings{zhang2016caffeine,
title={Caffeine: Towards uniformed representation and acceleration for deep convolutional neural networks},
author={Zhang, Chen and Fang, Zhenman and Zhou, Peipei and Pan, Peichen and Cong, Jason},
booktitle={Computer-Aided Design (ICCAD), 2016 IEEE/ACM International Conference on},
pages={1--8},
year={2016},
organization={IEEE}
}
@inproceedings{krizhevsky2012imagenet,
title={Imagenet classification with deep convolutional neural networks},
author={Krizhevsky, Alex and Sutskever, Ilya and Hinton, Geoffrey E},
booktitle={Advances in neural information processing systems},
pages={1097--1105},
year={2012}
}
@article{ILSVRC15,
Author = {Olga Russakovsky and Jia Deng and Hao Su and Jonathan Krause and Sanjeev Satheesh and Sean Ma and Zhiheng Huang and Andrej Karpathy and Aditya Khosla and Michael Bernstein and Alexander C. Berg and Li Fei-Fei},
Title = {{ImageNet Large Scale Visual Recognition Challenge}},
Year = {2015},
journal = {International Journal of Computer Vision (IJCV)},
doi = {10.1007/s11263-015-0816-y},
volume={115},
number={3},
pages={211-252}
}
@inproceedings{girshick2014rich,
title={Rich feature hierarchies for accurate object detection and semantic segmentation},
author={Girshick, Ross and Donahue, Jeff and Darrell, Trevor and Malik, Jitendra},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={580--587},
year={2014}
}
@article{hannun2014deep,
title={Deep speech: Scaling up end-to-end speech recognition},
author={Hannun, Awni and Case, Carl and Casper, Jared and Catanzaro, Bryan and Diamos, Greg and Elsen, Erich and Prenger, Ryan and Satheesh, Sanjeev and Sengupta, Shubho and Coates, Adam and others},
journal={arXiv preprint arXiv:1412.5567},
year={2014}
}
@article{simonyan2014very,
title={Very deep convolutional networks for large-scale image recognition},
author={Simonyan, Karen and Zisserman, Andrew},
journal={arXiv preprint arXiv:1409.1556},
year={2014}
}
@article{jia2014caffe,
Author = {Jia, Yangqing and Shelhamer, Evan and Donahue, Jeff and Karayev, Sergey and Long, Jonathan and Girshick, Ross and Guadarrama, Sergio and Darrell, Trevor},
Journal = {arXiv preprint arXiv:1408.5093},
Title = {Caffe: Convolutional Architecture for Fast Feature Embedding},
Year = {2014}
}
@article{abadi2016tensorflow,
title={Tensorflow: Large-scale machine learning on heterogeneous distributed systems},
author={Abadi, Mart{\'\i}n and Agarwal, Ashish and Barham, Paul and Brevdo, Eugene and Chen, Zhifeng and Citro, Craig and Corrado, Greg S and Davis, Andy and Dean, Jeffrey and Devin, Matthieu and others},
journal={arXiv preprint arXiv:1603.04467},
year={2016}
}
@article{xu2015empirical,
title={Empirical evaluation of rectified activations in convolutional network},
author={Xu, Bing and Wang, Naiyan and Chen, Tianqi and Li, Mu},
journal={arXiv preprint arXiv:1505.00853},
year={2015}
}
@inproceedings{he2016deep,
title={Deep residual learning for image recognition},
author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={770--778},
year={2016}
}
@article{iandola2016squeezenet,
title={SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and< 0.5 MB model size},
author={Iandola, Forrest N and Han, Song and Moskewicz, Matthew W and Ashraf, Khalid and Dally, William J and Keutzer, Kurt},
journal={arXiv preprint arXiv:1602.07360},
year={2016}
}
@article{guo2017angel,
title={Angel-Eye: A Complete Design Flow for Mapping CNN onto Embedded FPGA},
author={Guo, Kaiyuan and Sui, Lingzhi and Qiu, Jiantao and Yu, Jincheng and Wang, Junbin and Yao, Song and Han, Song and Wang, Yu and Yang, Huazhong},
journal={IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems},
year={2017},
publisher={IEEE}
}
@article{han2015deep,
title={Deep compression: Compressing deep neural networks with pruning, trained quantization and huffman coding},
author={Han, Song and Mao, Huizi and Dally, William J},
journal={arXiv preprint arXiv:1510.00149},
year={2015}
}
@inproceedings{chen2015compressing,
title={Compressing neural networks with the hashing trick},
author={Chen, Wenlin and Wilson, James and Tyree, Stephen and Weinberger, Kilian and Chen, Yixin},
booktitle={International Conference on Machine Learning},
pages={2285--2294},
year={2015}
}
@article{zhu2016trained,
title={Trained ternary quantization},
author={Zhu, Chenzhuo and Han, Song and Mao, Huizi and Dally, William J},
journal={arXiv preprint arXiv:1612.01064},
year={2016}
}
@article{li2016ternary,
title={Ternary weight networks},
author={Li, Fengfu and Zhang, Bo and Liu, Bin},
journal={arXiv preprint arXiv:1605.04711},
year={2016}
}
@article{zhou2016dorefa,
title={DoReFa-Net: Training low bitwidth convolutional neural networks with low bitwidth gradients},
author={Zhou, Shuchang and Wu, Yuxin and Ni, Zekun and Zhou, Xinyu and Wen, He and Zou, Yuheng},
journal={arXiv preprint arXiv:1606.06160},
year={2016}
}
@inproceedings{zhang2015efficient,
title={Efficient and accurate approximations of nonlinear convolutional networks},
author={Zhang, Xiangyu and Zou, Jianhua and Ming, Xiang and He, Kaiming and Sun, Jian},
booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
pages={1984--1992},
year={2015}
}
@inproceedings{liu2015sparse,
title={Sparse convolutional neural networks},
author={Liu, Baoyuan and Wang, Min and Foroosh, Hassan and Tappen, Marshall and Pensky, Marianna},
booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
pages={806--814},
year={2015}
}
@book{winograd1980arithmetic,
title={Arithmetic complexity of computations},
author={Winograd, Shmuel},
volume={33},
year={1980},
publisher={Siam}
}
@Misc{altera_dsp,
howpublished = {\url{https://www.altera.com/products/fpga/stratix-series/stratix-10/features.html}},
note = {Accessed Dec 7, 2017}
}
@Misc{xilinx_dsp,
howpublished = {\url{https://www.xilinx.com/support/documentation/user_guides/ug579-ultrascale-dsp.pdf}},
note = {Accessed Dec 7, 2017}
}
@misc{vlsi_energy,
author = {M. Horowitz},
title = {Energy table for 45nm process, Stanford VLSI wiki.[Online].},
howpublished = {\url{https://sites.google.com/site/seecproject}}
}
@inproceedings{szegedy2015going,
title={Going deeper with convolutions},
author={Szegedy, Christian and Liu, Wei and Jia, Yangqing and Sermanet, Pierre and Reed, Scott and Anguelov, Dragomir and Erhan, Dumitru and Vanhoucke, Vincent and Rabinovich, Andrew and others},
year={2015},
organization={Cvpr}
}
@inproceedings{gupta2016accelerating,
title={Accelerating datacenter workloads},
author={Gupta, PK},
booktitle={26th International Conference on Field Programmable Logic and Applications (FPL)},
year={2016}
}
@article{Howard2017MobileNets,
title={MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications},
author={Howard, Andrew G and Zhu, Menglong and Chen, Bo and Kalenichenko, Dmitry and Wang, Weijun and Weyand, Tobias and Andreetto, Marco and Adam, Hartwig},
year={2017},
}
@inproceedings{Moss2018A,
title={A Customizable Matrix Multiplication Framework for the Intel HARPv2 Xeon+FPGA Platform: A Deep Learning Case Study},
author={Moss, Duncan J. M and Leong, Philip H. W. and Krishnan, Srivatsan and Nurvitadhi, Eriko and Ratuszniak, Piotr and Johnson, Chris and Sim, Jaewoong and Mishra, Asit and Marr, Debbie and Subhaschandra, Suchit},
booktitle={Acm/sigda International Symposium},
pages={107-116},
year={2018},
}
@inproceedings{Shen2018Towards,
title={Towards a Uniform Template-based Architecture for Accelerating 2D and 3D CNNs on FPGA},
author={Shen, Junzhong and Huang, You and Wang, Zelong and Qiao, Yuran and Wen, Mei and Zhang, Chunyuan},
booktitle={Acm/sigda International Symposium},
pages={97-106},
year={2018},
}
@inproceedings{Yu2017Instruction,
title={Instruction driven cross-layer CNN accelerator with winograd transformation on FPGA},
author={Yu, Jincheng and Hu, Yiming and Ning, Xuefei and Qiu, Jiantao and Guo, Kaiyuan and Wang, Yu and Yang, Huazhong},
booktitle={International Conference on Field Programmable Technology},
pages={227-230},
year={2017},
}
@inproceedings{mao2017exploring,
title={Exploring the Granularity of Sparsity in Convolutional Neural Networks},
author={Mao, Huizi and Han, Song and Pool, Jeff and Li, Wenshuo and Liu, Xingyu and Wang, Yu and Dally, William J.},
booktitle={Computer Vision and Pattern Recognition Workshops},
pages={1927-1934},
year={2017},
}
@Misc{chai_dnn,
howpublished = {\url{https://github.com/Xilinx/chaidnn}},
note = {Accessed August 23, 2018}
}
@inproceedings{liu2016ssd,
title={Ssd: Single shot multibox detector},
author={Liu, Wei and Anguelov, Dragomir and Erhan, Dumitru and Szegedy, Christian and Reed, Scott and Fu, Cheng-Yang and Berg, Alexander C},
booktitle={European conference on computer vision},
pages={21--37},
year={2016},
organization={Springer}
}
@inproceedings{yang2018fully,
title={A Fully Onchip Binarized Convolutional Neural Network FPGA Impelmentation with Accurate Inference},
author={Yang, Li and He, Zhezhi and Fan, Deliang},
booktitle={Proceedings of the International Symposium on Low Power Electronics and Design},
pages={50},
year={2018},
organization={ACM}
}
@inproceedings{lin2018lcp,
title={LCP: a layer clusters paralleling mapping method for accelerating inception and residual networks on FPGA},
author={Lin, Xinhan and Yin, Shouyi and Tu, Fengbin and Liu, Leibo and Li, Xiangyu and Wei, Shaojun},
booktitle={Proceedings of the 55th Annual Design Automation Conference},
pages={16},
year={2018},
organization={ACM}
}
@article{ghasemzadehrebnet,
title={ReBNet: Residual Binarized Neural Network},
author={Ghasemzadeh, Mohammad and Samragh, Mohammad and Koushanfar, Farinaz}
}
@inproceedings{RN169,
author = {Putnam, Andrew},
title = {Large-scale reconfigurable computing in a Microsoft datacenter},
booktitle = {Hot Chips 26 Symposium (HCS), 2014 IEEE},
publisher = {IEEE},
pages = {1-38},
ISBN = {1467388831},
type = {Conference Proceedings}
}
@article{Shelhamer2017Fully,
title={Fully Convolutional Networks for Semantic Segmentation},
author={Shelhamer, E and Long, J. and Darrell, T},
journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
volume={39},
number={4},
pages={640},
year={2017},
}
@inproceedings{han2016eie,
title={EIE: efficient inference engine on compressed deep neural network},
author={Han, Song and Liu, Xingyu and Mao, Huizi and Pu, Jing and Pedram, Ardavan and Horowitz, Mark A and Dally, William J},
booktitle={Proceedings of the 43rd International Symposium on Computer Architecture},
pages={243--254},
year={2016},
organization={IEEE Press}
}
@inproceedings{tang2017train,
title={How to train a compact binary neural network with high accuracy?},
author={Tang, Wei and Hua, Gang and Wang, Liang},
booktitle={AAAI},
pages={2625--2631},
year={2017}
}
@article{hubara2017quantized,
title={Quantized Neural Networks: Training Neural Networks with Low Precision Weights and Activations.},
author={Hubara, Itay and Courbariaux, Matthieu and Soudry, Daniel and El-Yaniv, Ran and Bengio, Yoshua},
journal={Journal of Machine Learning Research},
volume={18},
pages={187--1},
year={2017}
}
@inproceedings{hubara2016binarized,
title={Binarized neural networks},
author={Hubara, Itay and Courbariaux, Matthieu and Soudry, Daniel and El-Yaniv, Ran and Bengio, Yoshua},
booktitle={Advances in neural information processing systems},
pages={4107--4115},
year={2016}
}
@inproceedings{han2015learning,
title={Learning both Weights and Connections for Efficient Neural Network},
author={Han, Song and Pool, Jeff and Tran, John and Dally, William},
booktitle={Advances in Neural Information Processing Systems},
pages={1135--1143},
year={2015}
}
@inproceedings{zhou2016less,
title={Less is more: Towards compact cnns},
author={Zhou, Hao and Alvarez, Jose M and Porikli, Fatih},
booktitle={European Conference on Computer Vision},
pages={662--677},
year={2016},
organization={Springer}
}
@inproceedings{lebedev2016fast,
title={Fast convnets using group-wise brain damage},
author={Lebedev, Vadim and Lempitsky, Victor},
booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
pages={2554--2564},
year={2016}
}
@inproceedings{wen2016learning,
title={Learning structured sparsity in deep neural networks},
author={Wen, Wei and Wu, Chunpeng and Wang, Yandan and Chen, Yiran and Li, Hai},
booktitle={Advances in Neural Information Processing Systems},
pages={2074--2082},
year={2016}
}
@article{molchanov2016pruning,
title={Pruning convolutional neural networks for resource efficient transfer learning},
author={Molchanov, Pavlo and Tyree, Stephen and Karras, Tero and Aila, Timo and Kautz, Jan},
journal={arXiv preprint arXiv:1611.06440},
year={2016}
}
@article{li2016pruning,
title={Pruning filters for efficient convnets},
author={Li, Hao and Kadav, Asim and Durdanovic, Igor and Samet, Hanan and Graf, Hans Peter},
journal={arXiv preprint arXiv:1608.08710},
year={2016}
}
@inproceedings{du2015shidiannao,
title ={ShiDianNao: shifting vision processing closer to the sensor},
author ={Du, Zidong and Fasthuber, Robert and Chen, Tianshi and others},
booktitle ={ISCA},
pages ={92--104},
year ={2015},
organization ={ACM}
}
@InProceedings{chen2016eyeriss,
Title = {Eyeriss: An Energy-Efficient Reconfigurable Accelerator for Deep Convolutional Neural Networks},
Author = {Yu-Hsin Chen and Tushar Krishna and Joel Emer and Vivienne Sze},
Booktitle = {ISSCC},
Year = {2016},
Organization = {IEEE}
}
@inproceedings{albericio2016cnvlutin,
title={Cnvlutin: ineffectual-neuron-free deep neural network computing},
author={Albericio, Jorge and Judd, Patrick and Hetherington, Tayler and Aamodt, Tor and Jerger, Natalie Enright and Moshovos, Andreas},
booktitle={Computer Architecture (ISCA), 2016 ACM/IEEE 43rd Annual International Symposium on},
pages={1--13},
year={2016},
organization={IEEE}
}
@inproceedings{chen2016eyeriss,
Title = {Eyeriss: An Energy-Efficient Reconfigurable Accelerator for Deep Convolutional Neural Networks},
Author = {Yu-Hsin Chen and Tushar Krishna and Joel Emer and Vivienne Sze},
Booktitle = {ISSCC},
Year = {2016},
Organization = {IEEE}
}
@inproceedings{zhang2016cambricon,
title={Cambricon-x: An accelerator for sparse neural networks},
author={Zhang, Shijin and Du, Zidong and Zhang, Lei and Lan, Huiying and Liu, Shaoli and Li, Ling and Guo, Qi and Chen, Tianshi and Chen, Yunji},
booktitle={The 49th Annual IEEE/ACM International Symposium on Microarchitecture},
pages={20},
year={2016},
organization={IEEE Press}
}
@inproceedings{zhao2016f,
title={F-CNN: An FPGA-based framework for training convolutional neural networks},
author={Zhao, Wenlai and Fu, Haohuan and Luk, Wayne and Yu, Teng and Wang, Shaojun and Feng, Bo and Ma, Yuchun and Yang, Guangwen},
booktitle={Application-specific Systems, Architectures and Processors (ASAP), 2016 IEEE 27th International Conference on},
pages={107--114},
year={2016},
organization={IEEE}
}
@inproceedings{liu2017fpga,
title={An FPGA-based processor for training convolutional neural networks},
author={Liu, Zhiqiang and Dou, Yong and Jiang, Jingfei and Wang, Qiang and Chow, Paul},
booktitle={Field Programmable Technology (ICFPT), 2017 International Conference on},
pages={207--210},
year={2017},
organization={IEEE}
}
@inproceedings{geng2018fpdeep,
title={FPDeep: Acceleration and Load Balancing of CNN Training on FPGA Clusters},
author={Geng, Tong and Wang, Tianqi and Sanaullah, Ahmed and Yang, Chen and Xu, Rui and Patel, Rushi and Herbordt, Martin},
booktitle={2018 IEEE 26th Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM)},
pages={81--84},
year={2018},
organization={IEEE}
}
@inproceedings{bulucc2009parallel,
title={Parallel sparse matrix-vector and matrix-transpose-vector multiplication using compressed sparse blocks},
author={Bulu{\c{c}}, Aydin and Fineman, Jeremy T and Frigo, Matteo and Gilbert, John R and Leiserson, Charles E},
booktitle={Proceedings of the twenty-first annual symposium on Parallelism in algorithms and architectures},
pages={233--244},
year={2009},
organization={ACM}
}