-
Notifications
You must be signed in to change notification settings - Fork 21
/
paper.bib
153 lines (134 loc) · 5.89 KB
/
paper.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
@misc{sutton:1998,
title={Introduction to reinforcement learning. Vol. 135},
author={Sutton, Richard S and Barto, Andrew G},
year={1998},
publisher={Cambridge: MIT press}
}
@article{dhariwal:2017,
title={Openai baselines},
author={Dhariwal, Prafulla and Hesse, Christopher and Klimov, Oleg and Nichol, Alex and Plappert, Matthias and Radford, Alec and Schulman, John and Sidor, Szymon and Wu, Yuhuai and Zhokhov, Peter},
url={https://github.com/openai/baselines},
year={2017}
}
@article{schaarschmidt:2017,
title={Tensorforce: A tensorflow library for applied reinforcement learning},
author={Schaarschmidt, Michael and Kuhnle, Alexander and Fricke, Kai},
url={https://github.com/tensorflow/tensorflow},
year={2017}
}
@article{liang:2017,
title={Ray rllib: A composable and scalable reinforcement learning library},
author={Liang, Eric and Liaw, Richard and Nishihara, Robert and Moritz, Philipp and Fox, Roy and Gonzalez, Joseph and Goldberg, Ken and Stoica, Ion},
journal={arXiv preprint arXiv:1712.09381},
year={2017}
}
@article{caspi:2017,
title={Reinforcement Learning Coach.(Dec. 2017)},
author={Caspi, Itai and Leibovich, Gal and Novik, Gal},
doi={10.5281/zenodo.1134899},
url={https://doi.org/10.5281/zenodo.1134899},
volume={1134899},
year={2017}
}
@inproceedings{abadi:2016,
title={Tensorflow: A system for large-scale machine learning},
author={Abadi, Mart{\'\i}n and Barham, Paul and Chen, Jianmin and Chen, Zhifeng and Davis, Andy and Dean, Jeffrey and Devin, Matthieu and Ghemawat, Sanjay and Irving, Geoffrey and Isard, Michael and others},
booktitle={12th USENIX Symposium on Operating Systems Design and Implementation OSDI 16)},
pages={265--283},
year={2016}
}
@article{brockman:2016,
title={Openai gym},
author={Brockman, Greg and Cheung, Vicki and Pettersson, Ludwig and Schneider, Jonas and Schulman, John and Tang, Jie and Zaremba, Wojciech},
journal={arXiv preprint arXiv:1606.01540},
year={2016}
}
@misc{kerasrl:2019, title={keras-rl/keras-rl}, url={https://github.com/keras-rl/keras-rl}, journal={GitHub}, author={Matthias Plappert}, year={2019}, month={Mar}}
@article{castro:2018,
title={Dopamine: A research framework for deep reinforcement learning},
author={Castro, Pablo Samuel and Moitra, Subhodeep and Gelada, Carles and Kumar, Saurabh and Bellemare, Marc G},
journal={arXiv preprint arXiv:1812.06110},
year={2018}
}
@article{mnih:2013,
title={Playing atari with deep reinforcement learning},
author={Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and Graves, Alex and Antonoglou, Ioannis and Wierstra, Daan and Riedmiller, Martin},
journal={arXiv preprint arXiv:1312.5602},
year={2013}
}
@article{plappert:2017,
title={Parameter space noise for exploration},
author={Plappert, Matthias and Houthooft, Rein and Dhariwal, Prafulla and Sidor, Szymon and Chen, Richard Y and Chen, Xi and Asfour, Tamim and Abbeel, Pieter and Andrychowicz, Marcin},
journal={arXiv preprint arXiv:1706.01905},
year={2017}
}
@incollection{sutton2000,
title = {Policy Gradient Methods for Reinforcement Learning with Function Approximation},
author = {Sutton, Richard S and David A. McAllester and Satinder P. Singh and Mansour, Yishay},
booktitle = {{Advances in Neural Information Processing Systems 12}},
editor = {S. A. Solla and T. K. Leen and K. M\"{u}ller},
pages = {1057--1063},
year = {2000},
publisher = {MIT Press},
}
@inproceedings{silver2014deterministic,
title = {Deterministic Policy Gradient Algorithms},
author = {David Silver and Guy Lever and Nicolas Heess and Thomas Degris and Daan Wierstra and Martin Riedmiller},
booktitle = {{Proceedings of the 31st International Conference on Machine Learning}},
pages = {387--395},
year = {2014},
editor = {Eric P. Xing and Tony Jebara},
volume = {32},
number = {1},
series = {Proceedings of Machine Learning Research},
address = {Bejing, China},
month = {22--24 Jun},
publisher = {{PMLR}},
url = {http://proceedings.mlr.press/v32/silver14.html},
}
@article{schulman:2017,
title={Proximal policy optimization algorithms},
author={Schulman, John and Wolski, Filip and Dhariwal, Prafulla and Radford, Alec and Klimov, Oleg},
journal={arXiv preprint arXiv:1707.06347},
year={2017}
}
@article{savva:2019,
title={Habitat: A platform for embodied ai research},
author={Savva, Manolis and Kadian, Abhishek and Maksymets, Oleksandr and Zhao, Yili and Wijmans, Erik and Jain, Bhavana and Straub, Julian and Liu, Jia and Koltun, Vladlen and Malik, Jitendra and others},
journal={arXiv preprint arXiv:1904.01201},
year={2019}
}
@article{chen:2018,
title={Bayesian optimization in alphago},
author={Chen, Yutian and Huang, Aja and Wang, Ziyu and Antonoglou, Ioannis and Schrittwieser, Julian and Silver, David and de Freitas, Nando},
journal={arXiv preprint arXiv:1812.06855},
year={2018}
}
@inproceedings{ha:2018,
title={Recurrent world models facilitate policy evolution},
author={Ha, David and Schmidhuber, J{\"u}rgen},
booktitle={Advances in Neural Information Processing Systems},
pages={2450--2462},
year={2018}
}
@misc{TFAgents,
title={TF-Agents: A library for Reinforcement Learning in TensorFlow},
author={Sergio Guadarrama, Anoop Korattikara, Oscar Ramirez,
Pablo Castro, Ethan Holly, Sam Fishman, Ke Wang, Ekaterina Gonina, Neal Wu,
Chris Harris, Vincent Vanhoucke, Eugene Brevdo},
url={"https://github.com/tensorflow/agents"},
year={2018}
}
@misc{hill:2019,
title={hill-a/stable-baselines},
url={https://github.com/hill-a/stable-baselines},
author={Hill},
year={2019},
month={Jun}
}
@inproceedings{paszke2017automatic,
title={Automatic Differentiation in PyTorch},
author={Paszke, Adam and Gross, Sam and Chintala, Soumith and Chanan, Gregory and Yang, Edward and DeVito, Zachary and Lin, Zeming and Desmaison, Alban and Antiga, Luca and Lerer, Adam},
booktitle={NIPS Autodiff Workshop},
year={2017}
}