ref.bib

@article{li20177,
  title={A 7.663-TOPS 8.2-W Energy-efficient FPGA Accelerator for Binary Convolutional Neural Networks},
  author={Li, Yixing and Liu, Zichuan and Xu, Kai and Yu, Hao and Ren, Fengbo},
  journal={arXiv preprint arXiv:1702.06392},
  year={2017}
}

@inproceedings{nakahara2017batch,
  title={A Batch Normalization Free Binarized Convolutional Deep Neural Network on an FPGA},
  author={Nakahara, Hiroki and Yonekawa, Haruyoshi and Iwamoto, Hisashi and Motomura, Masato},
  booktitle={Proceedings of the 2017 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays},
  pages={290--290},
  year={2017},
  organization={ACM}
}

@inproceedings{zhao2017accelerating,
  title={Accelerating Binarized Convolutional Neural Networks with Software-Programmable FPGAs.},
  author={Zhao, Ritchie and Song, Weinan and Zhang, Wentao and Xing, Tianwei and Lin, Jeng-Hau and Srivastava, Mani B and Gupta, Rajesh and Zhang, Zhiru},
  booktitle={FPGA},
  pages={15--24},
  year={2017}
}

@article{aydonat2017opencl,
  title={An OpenCL (TM) Deep Learning Accelerator on Arria 10},
  author={Aydonat, Utku and O'Connell, Shane and Capalija, Davor and Ling, Andrew C and Chiu, Gordon R},
  journal={arXiv preprint arXiv:1701.03534},
  year={2017}
}

@inproceedings{han2017ese,
  title={ESE: Efficient Speech Recognition Engine with Sparse LSTM on FPGA.},
  author={Han, Song and Kang, Junlong and Mao, Huizi and Hu, Yiming and Li, Xin and Li, Yubin and Xie, Dongliang and Luo, Hong and Yao, Song and Wang, Yu and others},
  booktitle={FPGA},
  pages={75--84},
  year={2017}
}

@inproceedings{umuroglu2017finn,
  title={Finn: A framework for fast, scalable binarized neural network inference},
  author={Umuroglu, Yaman and Fraser, Nicholas J and Gambardella, Giulio and Blott, Michaela and Leong, Philip and Jahre, Magnus and Vissers, Kees},
  booktitle={Proceedings of the 2017 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays},
  pages={65--74},
  year={2017},
  organization={ACM}
}

@inproceedings{venieris2017fpgaconvnet,
  title={fpgaConvNet: Automated Mapping of Convolutional Neural Networks on FPGAs},
  author={Venieris, Stylianos I and Bouganis, Christos-Savvas},
  booktitle={Proceedings of the 2017 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays},
  pages={291--292},
  year={2017},
  organization={ACM}
}

@inproceedings{zhang2017frequency,
  title={Frequency domain acceleration of convolutional neural networks on CPU-FPGA shared memory system},
  author={Zhang, Chi and Prasanna, Viktor},
  booktitle={Proceedings of the 2017 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays},
  pages={35--44},
  year={2017},
  organization={ACM}
}

@inproceedings{zhang2017improving,
  title={Improving the Performance of OpenCL-based FPGA Accelerator for Convolutional Neural Network.},
  author={Zhang, Jialiang and Li, Jing},
  booktitle={FPGA},
  pages={25--34},
  year={2017}
}

@inproceedings{ma2017optimizing,
  title={Optimizing Loop Operation and Dataflow in FPGA Acceleration of Deep Convolutional Neural Networks},
  author={Ma, Yufei and Cao, Yu and Vrudhula, Sarma and Seo, Jae-sun},
  booktitle={Proceedings of the 2017 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays},
  pages={45--54},
  year={2017},
  organization={ACM}
}

@inproceedings{qiu2016going,
  title={Going deeper with embedded fpga platform for convolutional neural network},
  author={Qiu, Jiantao and Wang, Jie and Yao, Song and Guo, Kaiyuan and Li, Boxun and Zhou, Erjin and Yu, Jincheng and Tang, Tianqi and Xu, Ningyi and Song, Sen and others},
  booktitle={Proceedings of the 2016 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays},
  pages={26--35},
  year={2016},
  organization={ACM}
}

@inproceedings{suda2016throughput,
  title={Throughput-optimized OpenCL-based FPGA accelerator for large-scale convolutional neural networks},
  author={Suda, Naveen and Chandra, Vikas and Dasika, Ganesh and Mohanty, Abinash and Ma, Yufei and Vrudhula, Sarma and Seo, Jae-sun and Cao, Yu},
  booktitle={Proceedings of the 2016 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays},
  pages={16--25},
  year={2016},
  organization={ACM}
}

@inproceedings{zhang2015optimizing,
  title={Optimizing fpga-based accelerator design for deep convolutional neural networks},
  author={Zhang, Chen and Li, Peng and Sun, Guangyu and Guan, Yijin and Xiao, Bingjun and Cong, Jason},
  booktitle={Proceedings of the 2015 ACM/SIGDA International Symposium on Field-Programmable Gate Arrays},
  pages={161--170},
  year={2015},
  organization={ACM}
}

@inproceedings{guan2017fp,
  title={FP-DNN: An Automated Framework for Mapping Deep Neural Networks onto FPGAs with RTL-HLS Hybrid Templates},
  author={Guan, Yijin and Liang, Hao and Xu, Ningyi and Wang, Wenqiang and Shi, Shaoshuai and Chen, Xi and Sun, Guangyu and Zhang, Wei and Cong, Jason},
  booktitle={Field-Programmable Custom Computing Machines (FCCM), 2017 IEEE 25th Annual International Symposium on},
  pages={152--159},
  year={2017},
  organization={IEEE}
}

@inproceedings{lu2017evaluating,
  title={Evaluating fast algorithms for convolutional neural networks on fpgas},
  author={Lu, Liqiang and Liang, Yun and Xiao, Qingcheng and Yan, Shengen},
  booktitle={Field-Programmable Custom Computing Machines (FCCM), 2017 IEEE 25th Annual International Symposium on},
  pages={101--108},
  year={2017},
  organization={IEEE}
}

@inproceedings{samragh2017customizing,
  title={Customizing neural networks for efficient fpga implementation},
  author={Samragh, Mohammad and Ghasemzadeh, Mohammad and Koushanfar, Farinaz},
  booktitle={Field-Programmable Custom Computing Machines (FCCM), 2017 IEEE 25th Annual International Symposium on},
  pages={85--92},
  year={2017},
  organization={IEEE}
}

@inproceedings{shen2017escher,
  title={Escher: A CNN Accelerator with Flexible Buffering to Minimize Off-Chip Transfer},
  author={Shen, Yongming and Ferdman, Michael and Milder, Peter},
  booktitle={Proceedings of the 25th IEEE International Symposium on Field-Programmable Custom Computing Machines (FCCM’17). IEEE Computer Society, Los Alamitos, CA, USA},
  year={2017}
}

@inproceedings{guo2017bit,
  title={Bit-Width Based Resource Partitioning for CNN Acceleration on FPGA},
  author={Guo, Jianxin and Yin, Shouyi and Ouyang, Peng and Liu, Leibo and Wei, Shaojun},
  booktitle={Field-Programmable Custom Computing Machines (FCCM), 2017 IEEE 25th Annual International Symposium on},
  pages={31--31},
  year={2017},
  organization={IEEE}
}

@inproceedings{podili2017fast,
  title={Fast and efficient implementation of Convolutional Neural Networks on FPGA},
  author={Podili, Abhinav and Zhang, Chi and Prasanna, Viktor},
  booktitle={Application-specific Systems, Architectures and Processors (ASAP), 2017 IEEE 28th International Conference on},
  pages={11--18},
  year={2017},
  organization={IEEE}
}

@inproceedings{colangelo2017fine,
  title={Fine-Grained Acceleration of Binary Neural Networks Using Intel{\textregistered} Xeon{\textregistered} Processor with Integrated FPGA},
  author={Colangelo, Philip and Huang, Randy and Luebbers, Enno and Margala, Martin and Nealis, Kevin},
  booktitle={Field-Programmable Custom Computing Machines (FCCM), 2017 IEEE 25th Annual International Symposium on},
  pages={135--135},
  year={2017},
  organization={IEEE}
}

@inproceedings{morcel2017minimalist,
  title={Minimalist Design for Accelerating Convolutional Neural Networks for Low-End FPGA Platforms},
  author={Morcel, Raghid and Akkary, Haitham and Hajj, Hazem and Saghir, Mazen and Keshavamurthy, Anil and Khanna, Rahul and Artail, Hassan},
  booktitle={Field-Programmable Custom Computing Machines (FCCM), 2017 IEEE 25th Annual International Symposium on},
  pages={196--196},
  year={2017},
  organization={IEEE}
}

@inproceedings{hegde2016evaluating,
  title={Evaluating Embedded FPGA Accelerators for Deep Learning Applications},
  author={Hegde, Gopalakrishna and Ramasamy, Nachiappan and Buddha, Vamsi and Kapre, Nachiket and others},
  booktitle={Field-Programmable Custom Computing Machines (FCCM), 2016 IEEE 24th Annual International Symposium on},
  pages={25--25},
  year={2016},
  organization={IEEE}
}

@inproceedings{li2015fpga,
  title={Fpga acceleration of recurrent neural network based language model},
  author={Li, Sicheng and Wu, Chunpeng and Li, Hai and Li, Boxun and Wang, Yu and Qiu, Qinru},
  booktitle={Field-Programmable Custom Computing Machines (FCCM), 2015 IEEE 23rd Annual International Symposium on},
  pages={111--118},
  year={2015},
  organization={IEEE}
}

@inproceedings{zhou2015fpga,
  title={FPGA design for PCANet deep learning network},
  author={Zhou, Yuteng and Wang, Wei and Huang, Xinming},
  booktitle={Field-Programmable Custom Computing Machines (FCCM), 2015 IEEE 23rd Annual International Symposium on},
  pages={232--232},
  year={2015},
  organization={IEEE}
}

@inproceedings{nakahara2017fully,
  title={A fully connected layer elimination for a binarizec convolutional neural network on an FPGA},
  author={Nakahara, Hiroki and Fujii, Tomoya and Sato, Shimpei},
  booktitle={Field Programmable Logic and Applications (FPL), 2017 27th International Conference on},
  pages={1--4},
  year={2017},
  organization={IEEE}
}

@inproceedings{wu2017high,
  title={A high-throughput reconfigurable processing array for neural networks},
  author={Wu, Ephrem and Zhang, Xiaoqian and Berman, David and Cho, Inkeun},
  booktitle={Field Programmable Logic and Applications (FPL), 2017 27th International Conference on},
  pages={1--4},
  year={2017},
  organization={IEEE}
}

@inproceedings{jiao2017accelerating,
  title={Accelerating low bit-width convolutional neural networks with embedded FPGA},
  author={Jiao, Li and Luo, Cheng and Cao, Wei and Zhou, Xuegong and Wang, Lingli},
  booktitle={Field Programmable Logic and Applications (FPL), 2017 27th International Conference on},
  pages={1--4},
  year={2017},
  organization={IEEE}
}

@inproceedings{ma2017automatic,
  title={An automatic RTL compiler for high-throughput FPGA implementation of diverse deep convolutional neural networks},
  author={Ma, Yufei and Cao, Yu and Vrudhula, Sarma and Seo, Jae-sun},
  booktitle={Field Programmable Logic and Applications (FPL), 2017 27th International Conference on},
  pages={1--8},
  year={2017},
  organization={IEEE}
}

@inproceedings{fan2017f,
  title={F-C3D: FPGA-based 3-dimensional convolutional neural network},
  author={Fan, Hongxiang and Niu, Xinyu and Liu, Qiang and Luk, Wayne},
  booktitle={Field Programmable Logic and Applications (FPL), 2017 27th International Conference on},
  pages={1--4},
  year={2017},
  organization={IEEE}
}

@inproceedings{moss2017high,
  title={High performance binary neural networks on the Xeon+ FPGA™ platform},
  author={Moss, Duncan JM and Nurvitadhi, Eriko and Sim, Jaewoong and Mishra, Asit and Marr, Debbie and Subhaschandra, Suchit and Leong, Philip HW},
  booktitle={Field Programmable Logic and Applications (FPL), 2017 27th International Conference on},
  pages={1--4},
  year={2017},
  organization={IEEE}
}

@inproceedings{zhang2017high,
  title={High-performance video content recognition with long-term recurrent convolutional network for FPGA},
  author={Zhang, Xiaofan and Liu, Xinheng and Ramachandran, Anand and Zhuge, Chuanhao and Tang, Shibin and Ouyang, Peng and Cheng, Zuofu and Rupnow, Kyle and Chen, Deming},
  booktitle={Field Programmable Logic and Applications (FPL), 2017 27th International Conference on},
  pages={1--4},
  year={2017},
  organization={IEEE}
}

@inproceedings{venieris2017latency,
  title={Latency-driven design for FPGA-based convolutional neural networks},
  author={Venieris, Stylianos I and Bouganis, Christos-Savvas},
  booktitle={Field Programmable Logic and Applications (FPL), 2017 27th International Conference on},
  pages={1--8},
  year={2017},
  organization={IEEE}
}

@inproceedings{lu2017leveraging,
  title={Leveraging FVT-margins in design space exploration for FFGA-based CNN accelerators},
  author={Lu, Weina and Lu, Wenyan and Ye, Jing and Hu, Yu and Li, Xiaowei},
  booktitle={Field Programmable Logic and Applications (FPL), 2017 27th International Conference on},
  pages={1--4},
  year={2017},
  organization={IEEE}
}

@inproceedings{vestias2017parallel,
  title={Parallel dot-products for deep learning on FPGA},
  author={V{\'e}stias, M{\'a}rio and Duarte, Rui Policarpo and de Sousa, Jos{\'e} T and Neto, Hor{\'a}cio},
  booktitle={Field Programmable Logic and Applications (FPL), 2017 27th International Conference on},
  pages={1--4},
  year={2017},
  organization={IEEE}
}

@inproceedings{prost2017scalable,
  title={Scalable high-performance architecture for convolutional ternary neural networks on FPGA},
  author={Prost-Boucle, Adrien and Bourge, Alban and P{\'e}trot, Fr{\'e}d{\'e}ric and Alemdar, Hande and Caldwell, Nicholas and Leroy, Vincent},
  booktitle={Field Programmable Logic and Applications (FPL), 2017 27th International Conference on},
  pages={1--7},
  year={2017},
  organization={IEEE}
}

@inproceedings{li2016high,
  title={A high performance FPGA-based accelerator for large-scale convolutional neural networks},
  author={Li, Huimin and Fan, Xitian and Jiao, Li and Cao, Wei and Zhou, Xuegong and Wang, Lingli},
  booktitle={Field Programmable Logic and Applications (FPL), 2016 26th International Conference on},
  pages={1--9},
  year={2016},
  organization={IEEE}
}

@inproceedings{nurvitadhi2016accelerating1,
  title={Accelerating recurrent neural networks in analytics servers: comparison of FPGA, CPU, GPU, and ASIC},
  author={Nurvitadhi, Eriko and Sim, Jaewoong and Sheffield, David and Mishra, Asit and Krishnan, Srivatsan and Marr, Debbie},
  booktitle={Field Programmable Logic and Applications (FPL), 2016 26th International Conference on},
  pages={1--4},
  year={2016},
  organization={IEEE}
}

@inproceedings{shen2016overcoming,
  title={Overcoming resource underutilization in spatial CNN accelerators},
  author={Shen, Yongming and Ferdman, Michael and Milder, Peter},
  booktitle={Field Programmable Logic and Applications (FPL), 2016 26th International Conference on},
  pages={1--4},
  year={2016},
  organization={IEEE}
}

@inproceedings{nakahara2015deep,
  title={A deep convolutional neural network based on nested residue number system},
  author={Nakahara, Hiroki and Sasao, Tsutomu},
  booktitle={Field Programmable Logic and Applications (FPL), 2015 25th International Conference on},
  pages={1--6},
  year={2015},
  organization={IEEE}
}

@inproceedings{liu2016automatic,
  title={Automatic code generation of convolutional neural networks in FPGA implementation},
  author={Liu, Zhiqiang and Dou, Yong and Jiang, Jingfei and Xu, Jinwei},
  booktitle={Field-Programmable Technology (FPT), 2016 International Conference on},
  pages={61--68},
  year={2016},
  organization={IEEE}
}

@inproceedings{nurvitadhi2016accelerating,
  title={Accelerating Binarized Neural Networks: Comparison of FPGA, CPU, GPU, and ASIC},
  author={Nurvitadhi, Eriko and Sheffield, David and Sim, Jaewoong and Mishra, Asit and Venkatesh, Ganesh and Marr, Debbie},
  booktitle={Field-Programmable Technology (FPT), 2016 International Conference on},
  pages={77--84},
  year={2016},
  organization={IEEE}
}

@inproceedings{dicecco2016caffeinated,
  title={Caffeinated FPGAs: FPGA Framework For Convolutional Neural Networks},
  author={DiCecco, Roberto and Lacey, Griffin and Vasiljevic, Jasmina and Chow, Paul and Taylor, Graham and Areibi, Shawki},
  booktitle={Field-Programmable Technology (FPT), 2016 International Conference on},
  pages={265--268},
  year={2016},
  organization={IEEE}
}

@inproceedings{nakahara2016memory,
  title={A memory-based realization of a binarized deep convolutional neural network},
  author={Nakahara, Hiroki and Yonekawa, Haruyoshi and Sasao, Tsutomu and Iwamoto, Hisashi and Motomura, Masato},
  booktitle={Field-Programmable Technology (FPT), 2016 International Conference on},
  pages={277--280},
  year={2016},
  organization={IEEE}
}

@inproceedings{zhang2016energy,
  title={Energy-Efficient CNN Implementation on a Deeply Pipelined FPGA Cluster},
  author={Zhang, Chen and Wu, Di and Sun, Jiayu and Sun, Guangyu and Luo, Guojie and Cong, Jason},
  booktitle={Proceedings of the 2016 International Symposium on Low Power Electronics and Design},
  pages={326--331},
  year={2016},
  organization={ACM}
}

@inproceedings{wei2017automated,
  title={Automated Systolic Array Architecture Synthesis for High Throughput CNN Inference on FPGAs},
  author={Wei, Xuechao and Yu, Cody Hao and Zhang, Peng and Chen, Youxiang and Wang, Yuxin and Hu, Han and Liang, Yun and Cong, Jason},
  booktitle={Proceedings of the 54th Annual Design Automation Conference 2017},
  pages={29},
  year={2017},
  organization={ACM}
}

@inproceedings{xiao2017exploring,
  title={Exploring Heterogeneous Algorithms for Accelerating Deep Convolutional Neural Networks on FPGAs},
  author={Xiao, Qingcheng and Liang, Yun and Lu, Liqiang and Yan, Shengen and Tai, Yu-Wing},
  booktitle={Proceedings of the 54th Annual Design Automation Conference 2017},
  pages={62},
  year={2017},
  organization={ACM}
}

@inproceedings{wang2016deepburning,
  title={DeepBurning: automatic generation of FPGA-based learning accelerators for the neural network family},
  author={Wang, Ying and Xu, Jie and Han, Yinhe and Li, Huawei and Li, Xiaowei},
  booktitle={Design Automation Conference (DAC), 2016 53nd ACM/EDAC/IEEE},
  pages={1--6},
  year={2016},
  organization={IEEE}
}

@inproceedings{motamedi2016design,
  title={Design space exploration of fpga-based deep convolutional neural networks},
  author={Motamedi, Mohammad and Gysel, Philipp and Akella, Venkatesh and Ghiasi, Soheil},
  booktitle={Design Automation Conference (ASP-DAC), 2016 21st Asia and South Pacific},
  pages={575--580},
  year={2016},
  organization={IEEE}
}

@inproceedings{guan2017fpga,
  title={FPGA-based accelerator for long short-term memory recurrent neural networks},
  author={Guan, Yijin and Yuan, Zhihang and Sun, Guangyu and Cong, Jason},
  booktitle={Design Automation Conference (ASP-DAC), 2017 22nd Asia and South Pacific},
  pages={629--634},
  year={2017},
  organization={IEEE}
}

@inproceedings{gokhale2014240,
  title={A 240 g-ops/s mobile coprocessor for deep neural networks},
  author={Gokhale, Vinayak and Jin, Jonghoon and Dundar, Aysegul and Martini, Berin and Culurciello, Eugenio},
  booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops},
  pages={682--687},
  year={2014}
}

@inproceedings{ding2017c,
  title={CirCNN: accelerating and compressing deep neural networks using block-circulant weight matrices},
  author={Ding, Caiwen and Liao, Siyu and Wang, Yanzhi and Li, Zhe and Liu, Ning and Zhuo, Youwei and Wang, Chao and Qian, Xuehai and Bai, Yu and Yuan, Geng and others},
  booktitle={Proceedings of the 50th Annual IEEE/ACM International Symposium on Microarchitecture},
  pages={395--408},
  year={2017},
  organization={ACM}
}

@inproceedings{sharma2016high,
  title={From high-level deep neural models to FPGAs},
  author={Sharma, Hardik and Park, Jongse and Mahajan, Divya and Amaro, Emmanuel and Kim, Joon Kyung and Shao, Chenkai and Mishra, Asit and Esmaeilzadeh, Hadi},
  booktitle={Microarchitecture (MICRO), 2016 49th Annual IEEE/ACM International Symposium on},
  pages={1--12},
  year={2016},
  organization={IEEE}
}

@inproceedings{alwani2016fused,
  title={Fused-layer CNN accelerators},
  author={Alwani, Manoj and Chen, Han and Ferdman, Michael and Milder, Peter},
  booktitle={Microarchitecture (MICRO), 2016 49th Annual IEEE/ACM International Symposium on},
  pages={1--12},
  year={2016},
  organization={IEEE}
}

@inproceedings{nguyen2017double,
  title={Double MAC: Doubling the performance of convolutional neural networks on modern FPGAs},
  author={Nguyen, Dong and Kim, Daewoo and Lee, Jongeun},
  booktitle={2017 Design, Automation \& Test in Europe Conference \& Exhibition (DATE)},
  pages={890--893},
  year={2017},
  organization={IEEE}
}

@inproceedings{rahman2017design,
  title={Design space exploration of FPGA accelerators for convolutional neural networks},
  author={Rahman, Atul and Oh, Sangyun and Lee, Jongeun and Choi, Kiyoung},
  booktitle={2017 Design, Automation \& Test in Europe Conference \& Exhibition (DATE)},
  pages={1147--1152},
  year={2017},
  organization={IEEE}
}

@inproceedings{shreejith2016accelerated,
  title={Accelerated artificial neural networks on FPGA for fault detection in automotive systems},
  author={Shreejith, Shanker and Anshuman, Bezborah and Fahmy, Suhaib A},
  booktitle={Design, Automation \& Test in Europe Conference \& Exhibition (DATE), 2016},
  pages={37--42},
  year={2016},
  organization={IEEE}
}

@inproceedings{rahman2016efficient,
  title={Efficient FPGA acceleration of convolutional neural networks using logical-3D compute array},
  author={Rahman, Atul and Lee, Jongeun and Choi, Kiyoung},
  booktitle={Design, Automation \& Test in Europe Conference \& Exhibition (DATE), 2016},
  pages={1393--1398},
  year={2016},
  organization={IEEE}
}

@inproceedings{zhang2016caffeine,
  title={Caffeine: Towards uniformed representation and acceleration for deep convolutional neural networks},
  author={Zhang, Chen and Fang, Zhenman and Zhou, Peipei and Pan, Peichen and Cong, Jason},
  booktitle={Computer-Aided Design (ICCAD), 2016 IEEE/ACM International Conference on},
  pages={1--8},
  year={2016},
  organization={IEEE}
}

@inproceedings{krizhevsky2012imagenet,
  title={Imagenet classification with deep convolutional neural networks},
  author={Krizhevsky, Alex and Sutskever, Ilya and Hinton, Geoffrey E},
  booktitle={Advances in neural information processing systems},
  pages={1097--1105},
  year={2012}
}

@article{ILSVRC15,
Author = {Olga Russakovsky and Jia Deng and Hao Su and Jonathan Krause and Sanjeev Satheesh and Sean Ma and Zhiheng Huang and Andrej Karpathy and Aditya Khosla and Michael Bernstein and Alexander C. Berg and Li Fei-Fei},
Title = {{ImageNet Large Scale Visual Recognition Challenge}},
Year = {2015},
journal   = {International Journal of Computer Vision (IJCV)},
doi = {10.1007/s11263-015-0816-y},
volume={115},
number={3},
pages={211-252}
}

@inproceedings{girshick2014rich,
  title={Rich feature hierarchies for accurate object detection and semantic segmentation},
  author={Girshick, Ross and Donahue, Jeff and Darrell, Trevor and Malik, Jitendra},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={580--587},
  year={2014}
}

@article{hannun2014deep,
  title={Deep speech: Scaling up end-to-end speech recognition},
  author={Hannun, Awni and Case, Carl and Casper, Jared and Catanzaro, Bryan and Diamos, Greg and Elsen, Erich and Prenger, Ryan and Satheesh, Sanjeev and Sengupta, Shubho and Coates, Adam and others},
  journal={arXiv preprint arXiv:1412.5567},
  year={2014}
}

@article{simonyan2014very,
  title={Very deep convolutional networks for large-scale image recognition},
  author={Simonyan, Karen and Zisserman, Andrew},
  journal={arXiv preprint arXiv:1409.1556},
  year={2014}
}

@article{jia2014caffe,
  Author = {Jia, Yangqing and Shelhamer, Evan and Donahue, Jeff and Karayev, Sergey and Long, Jonathan and Girshick, Ross and Guadarrama, Sergio and Darrell, Trevor},
  Journal = {arXiv preprint arXiv:1408.5093},
  Title = {Caffe: Convolutional Architecture for Fast Feature Embedding},
  Year = {2014}
}

@article{abadi2016tensorflow,
  title={Tensorflow: Large-scale machine learning on heterogeneous distributed systems},
  author={Abadi, Mart{\'\i}n and Agarwal, Ashish and Barham, Paul and Brevdo, Eugene and Chen, Zhifeng and Citro, Craig and Corrado, Greg S and Davis, Andy and Dean, Jeffrey and Devin, Matthieu and others},
  journal={arXiv preprint arXiv:1603.04467},
  year={2016}
}

@article{xu2015empirical,
  title={Empirical evaluation of rectified activations in convolutional network},
  author={Xu, Bing and Wang, Naiyan and Chen, Tianqi and Li, Mu},
  journal={arXiv preprint arXiv:1505.00853},
  year={2015}
}

@inproceedings{he2016deep,
  title={Deep residual learning for image recognition},
  author={He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian},
  booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
  pages={770--778},
  year={2016}
}

@article{iandola2016squeezenet,
  title={SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and< 0.5 MB model size},
  author={Iandola, Forrest N and Han, Song and Moskewicz, Matthew W and Ashraf, Khalid and Dally, William J and Keutzer, Kurt},
  journal={arXiv preprint arXiv:1602.07360},
  year={2016}
}

@article{guo2017angel,
  title={Angel-Eye: A Complete Design Flow for Mapping CNN onto Embedded FPGA},
  author={Guo, Kaiyuan and Sui, Lingzhi and Qiu, Jiantao and Yu, Jincheng and Wang, Junbin and Yao, Song and Han, Song and Wang, Yu and Yang, Huazhong},
  journal={IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems},
  year={2017},
  publisher={IEEE}
}

@article{han2015deep,
  title={Deep compression: Compressing deep neural networks with pruning, trained quantization and huffman coding},
  author={Han, Song and Mao, Huizi and Dally, William J},
  journal={arXiv preprint arXiv:1510.00149},
  year={2015}
}

@inproceedings{chen2015compressing,
  title={Compressing neural networks with the hashing trick},
  author={Chen, Wenlin and Wilson, James and Tyree, Stephen and Weinberger, Kilian and Chen, Yixin},
  booktitle={International Conference on Machine Learning},
  pages={2285--2294},
  year={2015}
}

@article{zhu2016trained,
  title={Trained ternary quantization},
  author={Zhu, Chenzhuo and Han, Song and Mao, Huizi and Dally, William J},
  journal={arXiv preprint arXiv:1612.01064},
  year={2016}
}

@article{li2016ternary,
  title={Ternary weight networks},
  author={Li, Fengfu and Zhang, Bo and Liu, Bin},
  journal={arXiv preprint arXiv:1605.04711},
  year={2016}
}

@article{zhou2016dorefa,
  title={DoReFa-Net: Training low bitwidth convolutional neural networks with low bitwidth gradients},
  author={Zhou, Shuchang and Wu, Yuxin and Ni, Zekun and Zhou, Xinyu and Wen, He and Zou, Yuheng},
  journal={arXiv preprint arXiv:1606.06160},
  year={2016}
}

@inproceedings{zhang2015efficient,
  title={Efficient and accurate approximations of nonlinear convolutional networks},
  author={Zhang, Xiangyu and Zou, Jianhua and Ming, Xiang and He, Kaiming and Sun, Jian},
  booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
  pages={1984--1992},
  year={2015}
}

@inproceedings{liu2015sparse,
  title={Sparse convolutional neural networks},
  author={Liu, Baoyuan and Wang, Min and Foroosh, Hassan and Tappen, Marshall and Pensky, Marianna},
  booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
  pages={806--814},
  year={2015}
}

@book{winograd1980arithmetic,
  title={Arithmetic complexity of computations},
  author={Winograd, Shmuel},
  volume={33},
  year={1980},
  publisher={Siam}
}

@Misc{altera_dsp,
  howpublished = {\url{https://www.altera.com/products/fpga/stratix-series/stratix-10/features.html}},  
  note = {Accessed Dec 7, 2017}
}

@Misc{xilinx_dsp,  
  howpublished = {\url{https://www.xilinx.com/support/documentation/user_guides/ug579-ultrascale-dsp.pdf}},  
  note = {Accessed Dec 7, 2017}
}

@misc{vlsi_energy,
   author = {M. Horowitz},
   title = {Energy table for 45nm process, Stanford VLSI wiki.[Online].},
   howpublished = {\url{https://sites.google.com/site/seecproject}}
}

@inproceedings{szegedy2015going,
  title={Going deeper with convolutions},
  author={Szegedy, Christian and Liu, Wei and Jia, Yangqing and Sermanet, Pierre and Reed, Scott and Anguelov, Dragomir and Erhan, Dumitru and Vanhoucke, Vincent and Rabinovich, Andrew and others},
  year={2015},
  organization={Cvpr}
}

@inproceedings{gupta2016accelerating,
  title={Accelerating datacenter workloads},
  author={Gupta, PK},
  booktitle={26th International Conference on Field Programmable Logic and Applications (FPL)},
  year={2016}
}

@article{Howard2017MobileNets,
  title={MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications},
  author={Howard, Andrew G and Zhu, Menglong and Chen, Bo and Kalenichenko, Dmitry and Wang, Weijun and Weyand, Tobias and Andreetto, Marco and Adam, Hartwig},
  year={2017},
}

@inproceedings{Moss2018A,
  title={A Customizable Matrix Multiplication Framework for the Intel HARPv2 Xeon+FPGA Platform: A Deep Learning Case Study},
  author={Moss, Duncan J. M and Leong, Philip H. W. and Krishnan, Srivatsan and Nurvitadhi, Eriko and Ratuszniak, Piotr and Johnson, Chris and Sim, Jaewoong and Mishra, Asit and Marr, Debbie and Subhaschandra, Suchit},
  booktitle={Acm/sigda International Symposium},
  pages={107-116},
  year={2018},
}

@inproceedings{Shen2018Towards,
  title={Towards a Uniform Template-based Architecture for Accelerating 2D and 3D CNNs on FPGA},
  author={Shen, Junzhong and Huang, You and Wang, Zelong and Qiao, Yuran and Wen, Mei and Zhang, Chunyuan},
  booktitle={Acm/sigda International Symposium},
  pages={97-106},
  year={2018},
}

@inproceedings{Yu2017Instruction,
  title={Instruction driven cross-layer CNN accelerator with winograd transformation on FPGA},
  author={Yu, Jincheng and Hu, Yiming and Ning, Xuefei and Qiu, Jiantao and Guo, Kaiyuan and Wang, Yu and Yang, Huazhong},
  booktitle={International Conference on Field Programmable Technology},
  pages={227-230},
  year={2017},
}

@inproceedings{mao2017exploring,
  title={Exploring the Granularity of Sparsity in Convolutional Neural Networks},
  author={Mao, Huizi and Han, Song and Pool, Jeff and Li, Wenshuo and Liu, Xingyu and Wang, Yu and Dally, William J.},
  booktitle={Computer Vision and Pattern Recognition Workshops},
  pages={1927-1934},
  year={2017},
}

@Misc{chai_dnn,
  howpublished = {\url{https://github.com/Xilinx/chaidnn}},  
  note = {Accessed August 23, 2018}
}

@inproceedings{liu2016ssd,
  title={Ssd: Single shot multibox detector},
  author={Liu, Wei and Anguelov, Dragomir and Erhan, Dumitru and Szegedy, Christian and Reed, Scott and Fu, Cheng-Yang and Berg, Alexander C},
  booktitle={European conference on computer vision},
  pages={21--37},
  year={2016},
  organization={Springer}
}

@inproceedings{yang2018fully,
  title={A Fully Onchip Binarized Convolutional Neural Network FPGA Impelmentation with Accurate Inference},
  author={Yang, Li and He, Zhezhi and Fan, Deliang},
  booktitle={Proceedings of the International Symposium on Low Power Electronics and Design},
  pages={50},
  year={2018},
  organization={ACM}
}

@inproceedings{lin2018lcp,
  title={LCP: a layer clusters paralleling mapping method for accelerating inception and residual networks on FPGA},
  author={Lin, Xinhan and Yin, Shouyi and Tu, Fengbin and Liu, Leibo and Li, Xiangyu and Wei, Shaojun},
  booktitle={Proceedings of the 55th Annual Design Automation Conference},
  pages={16},
  year={2018},
  organization={ACM}
}

@article{ghasemzadehrebnet,
  title={ReBNet: Residual Binarized Neural Network},
  author={Ghasemzadeh, Mohammad and Samragh, Mohammad and Koushanfar, Farinaz}
}

@inproceedings{RN169,
   author = {Putnam, Andrew},
   title = {Large-scale reconfigurable computing in a Microsoft datacenter},
   booktitle = {Hot Chips 26 Symposium (HCS), 2014 IEEE},
   publisher = {IEEE},
   pages = {1-38},
   ISBN = {1467388831},
   type = {Conference Proceedings}
}

@article{Shelhamer2017Fully,
  title={Fully Convolutional Networks for Semantic Segmentation},
  author={Shelhamer, E and Long, J. and Darrell, T},
  journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
  volume={39},
  number={4},
  pages={640},
  year={2017},
}

@inproceedings{han2016eie,
  title={EIE: efficient inference engine on compressed deep neural network},
  author={Han, Song and Liu, Xingyu and Mao, Huizi and Pu, Jing and Pedram, Ardavan and Horowitz, Mark A and Dally, William J},
  booktitle={Proceedings of the 43rd International Symposium on Computer Architecture},
  pages={243--254},
  year={2016},
  organization={IEEE Press}
}

@inproceedings{tang2017train,
  title={How to train a compact binary neural network with high accuracy?},
  author={Tang, Wei and Hua, Gang and Wang, Liang},
  booktitle={AAAI},
  pages={2625--2631},
  year={2017}
}

@article{hubara2017quantized,
  title={Quantized Neural Networks: Training Neural Networks with Low Precision Weights and Activations.},
  author={Hubara, Itay and Courbariaux, Matthieu and Soudry, Daniel and El-Yaniv, Ran and Bengio, Yoshua},
  journal={Journal of Machine Learning Research},
  volume={18},
  pages={187--1},
  year={2017}
}

@inproceedings{hubara2016binarized,
  title={Binarized neural networks},
  author={Hubara, Itay and Courbariaux, Matthieu and Soudry, Daniel and El-Yaniv, Ran and Bengio, Yoshua},
  booktitle={Advances in neural information processing systems},
  pages={4107--4115},
  year={2016}
}

@inproceedings{han2015learning,
  title={Learning both Weights and Connections for Efficient Neural Network},
  author={Han, Song and Pool, Jeff and Tran, John and Dally, William},
  booktitle={Advances in Neural Information Processing Systems},
  pages={1135--1143},
  year={2015}
}

@inproceedings{zhou2016less,
  title={Less is more: Towards compact cnns},
  author={Zhou, Hao and Alvarez, Jose M and Porikli, Fatih},
  booktitle={European Conference on Computer Vision},
  pages={662--677},
  year={2016},
  organization={Springer}
}

@inproceedings{lebedev2016fast,
  title={Fast convnets using group-wise brain damage},
  author={Lebedev, Vadim and Lempitsky, Victor},
  booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
  pages={2554--2564},
  year={2016}
}

@inproceedings{wen2016learning,
  title={Learning structured sparsity in deep neural networks},
  author={Wen, Wei and Wu, Chunpeng and Wang, Yandan and Chen, Yiran and Li, Hai},
  booktitle={Advances in Neural Information Processing Systems},
  pages={2074--2082},
  year={2016}
}

@article{molchanov2016pruning,
  title={Pruning convolutional neural networks for resource efficient transfer learning},
  author={Molchanov, Pavlo and Tyree, Stephen and Karras, Tero and Aila, Timo and Kautz, Jan},
  journal={arXiv preprint arXiv:1611.06440},
  year={2016}
}

@article{li2016pruning,
  title={Pruning filters for efficient convnets},
  author={Li, Hao and Kadav, Asim and Durdanovic, Igor and Samet, Hanan and Graf, Hans Peter},
  journal={arXiv preprint arXiv:1608.08710},
  year={2016}
}

@inproceedings{du2015shidiannao,
  title         ={ShiDianNao: shifting vision processing closer to the sensor},
  author        ={Du, Zidong and Fasthuber, Robert and Chen, Tianshi and others},
  booktitle     ={ISCA},
  pages         ={92--104},
  year          ={2015},
  organization  ={ACM}
}

@InProceedings{chen2016eyeriss,
  Title                    = {Eyeriss: An Energy-Efficient Reconfigurable Accelerator for Deep Convolutional Neural Networks},
  Author                   = {Yu-Hsin Chen and Tushar Krishna and Joel Emer and Vivienne Sze},
  Booktitle                = {ISSCC},
  Year                     = {2016},
  Organization             = {IEEE}
}

@inproceedings{albericio2016cnvlutin,
  title={Cnvlutin: ineffectual-neuron-free deep neural network computing},
  author={Albericio, Jorge and Judd, Patrick and Hetherington, Tayler and Aamodt, Tor and Jerger, Natalie Enright and Moshovos, Andreas},
  booktitle={Computer Architecture (ISCA), 2016 ACM/IEEE 43rd Annual International Symposium on},
  pages={1--13},
  year={2016},
  organization={IEEE}
}

@inproceedings{chen2016eyeriss,
  Title                    = {Eyeriss: An Energy-Efficient Reconfigurable Accelerator for Deep Convolutional Neural Networks},
  Author                   = {Yu-Hsin Chen and Tushar Krishna and Joel Emer and Vivienne Sze},
  Booktitle                = {ISSCC},
  Year                     = {2016},
  Organization             = {IEEE}
}

@inproceedings{zhang2016cambricon,
  title={Cambricon-x: An accelerator for sparse neural networks},
  author={Zhang, Shijin and Du, Zidong and Zhang, Lei and Lan, Huiying and Liu, Shaoli and Li, Ling and Guo, Qi and Chen, Tianshi and Chen, Yunji},
  booktitle={The 49th Annual IEEE/ACM International Symposium on Microarchitecture},
  pages={20},
  year={2016},
  organization={IEEE Press}
}

@inproceedings{zhao2016f,
  title={F-CNN: An FPGA-based framework for training convolutional neural networks},
  author={Zhao, Wenlai and Fu, Haohuan and Luk, Wayne and Yu, Teng and Wang, Shaojun and Feng, Bo and Ma, Yuchun and Yang, Guangwen},
  booktitle={Application-specific Systems, Architectures and Processors (ASAP), 2016 IEEE 27th International Conference on},
  pages={107--114},
  year={2016},
  organization={IEEE}
}

@inproceedings{liu2017fpga,
  title={An FPGA-based processor for training convolutional neural networks},
  author={Liu, Zhiqiang and Dou, Yong and Jiang, Jingfei and Wang, Qiang and Chow, Paul},
  booktitle={Field Programmable Technology (ICFPT), 2017 International Conference on},
  pages={207--210},
  year={2017},
  organization={IEEE}
}

@inproceedings{geng2018fpdeep,
  title={FPDeep: Acceleration and Load Balancing of CNN Training on FPGA Clusters},
  author={Geng, Tong and Wang, Tianqi and Sanaullah, Ahmed and Yang, Chen and Xu, Rui and Patel, Rushi and Herbordt, Martin},
  booktitle={2018 IEEE 26th Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM)},
  pages={81--84},
  year={2018},
  organization={IEEE}
}

@inproceedings{bulucc2009parallel,
  title={Parallel sparse matrix-vector and matrix-transpose-vector multiplication using compressed sparse blocks},
  author={Bulu{\c{c}}, Aydin and Fineman, Jeremy T and Frigo, Matteo and Gilbert, John R and Leiserson, Charles E},
  booktitle={Proceedings of the twenty-first annual symposium on Parallelism in algorithms and architectures},
  pages={233--244},
  year={2009},
  organization={ACM}
}