-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathreferences.bib
481 lines (463 loc) · 17.1 KB
/
references.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
---
---
@article{Hall1968,
title = {Optimal Timing of Irrigation},
volume = {94},
ISSN = {2690-3296},
url = {http://dx.doi.org/10.1061/JRCEA4.0000569},
DOI = {10.1061/jrcea4.0000569},
number = {2},
journal = {Journal of the Irrigation and Drainage Division},
publisher = {American Society of Civil Engineers (ASCE)},
author = {Hall, Warren A. and Butcher, William S.},
year = {1968},
month = jun,
pages = {267–275}
}
@article{kraft1988,
title={A software package for sequential quadratic programming},
author={Kraft, Dieter},
journal={DFVLR Obersfaffeheim Report},
volume={88},
number={28},
pages={1--20},
year={1988}
}
@article{holland1992genetic,
title={Genetic algorithms},
author={Holland, John H},
journal={Scientific american},
volume={267},
number={1},
pages={66--73},
year={1992},
publisher={JSTOR}
}
@article{kirkpatrick1983optimization,
title={Optimization by simulated annealing},
author={Kirkpatrick, Scott and Gelatt Jr, C Daniel and Vecchi, Mario P},
journal={science},
volume={220},
number={4598},
pages={671--680},
year={1983},
publisher={American association for the advancement of science}
}
@inproceedings{kennedy1995particle,
title={Particle swarm optimization},
author={Kennedy, James and Eberhart, Russell},
booktitle={Proceedings of ICNN'95-International Conference on Neural Networks},
volume={4},
pages={1942--1948},
year={1995},
organization={IEEE}
}
@article{Iskhakov2020,
title = {Machine learning and structural econometrics: contrasts and synergies},
volume = {23},
ISSN = {1368-423X},
url = {http://dx.doi.org/10.1093/ectj/utaa019},
DOI = {10.1093/ectj/utaa019},
number = {3},
journal = {The Econometrics Journal},
publisher = {Oxford University Press (OUP)},
author = {Iskhakov, Fedor and Rust, John and Schjerning, Bertel},
year = {2020},
month = aug,
pages = {S81–S124}
}
@book{ortega_rheinboldt_1970,
author = {J. M. Ortega and W. C. Rheinboldt},
title = {Iterative Solution of Nonlinear Equations in Several Variables},
year = {1970},
publisher = {Academic Press},
address = {New York},
series = {Computer Science and Applied Mathematics},
}
@book{allgower_georg_1990,
author = {E. L. Allgower and K. Georg},
title = {Numerical Continuation Methods: An Introduction},
year = {1990},
publisher = {Springer-Verlag},
address = {Berlin, Heidelberg},
series = {Springer Series in Computational Mathematics},
volume = {13},
}
@book{arrow1958studies,
title={Studies in linear and non-linear programming},
author={Arrow, Kenneth J and Hurwicz, Leonid and Uzawa, Hirofumi},
year={1958},
publisher={Stanford University Press}
}
@inproceedings{goodfellow2014generative,
title={Generative adversarial nets},
author={Goodfellow, Ian and Pouget-Abadie, Jean and Mirza, Mehdi and Xu, Bing and Warde-Farley, David and Ozair, Sherjil and Courville, Aaron and Bengio, Yoshua},
booktitle={Advances in neural information processing systems},
volume={27},
year={2014}
}
@book{pontryagin1962mathematical,
title={The Mathematical Theory of Optimal Processes},
author={Pontryagin, Lev Semyonovich and Boltyanskii, Vladimir Grigor'evich and Gamkrelidze, Revaz Valerianovich and Mishchenko, Evgenii Frolovich},
year={1962},
publisher={Interscience Publishers}
}
@article{rumelhart1986learning,
title={Learning representations by back-propagating errors},
author={Rumelhart, David E and Hinton, Geoffrey E and Williams, Ronald J},
journal={Nature},
volume={323},
number={6088},
pages={533--536},
year={1986},
publisher={Nature Publishing Group}
}
@book{griewank1989automatic,
title={On automatic differentiation},
author={Griewank, Andreas},
year={1989},
publisher={Mathematical Programming: Recent Developments and Applications}
}
@article{lecun1988theoretical,
title={A theoretical framework for back-propagation},
author={LeCun, Yann},
journal={Proceedings of the 1988 Connectionist Models Summer School},
pages={21--28},
year={1988},
publisher={Morgan Kaufmann}
}
@inproceedings{Gravdahl1997,
title = {Compressor surge control using a close-coupled valve and backstepping},
url = {http://dx.doi.org/10.1109/ACC.1997.609673},
DOI = {10.1109/acc.1997.609673},
booktitle = {Proceedings of the 1997 American Control Conference (Cat. No.97CH36041)},
publisher = {IEEE},
author = {Gravdahl, J.T. and Egeland, O.},
year = {1997},
pages = {982–986 vol.2}
}
@book{Grancharova2012,
title = "Explicit nonlinear model predictive control",
author = "Grancharova, Alexandra Ivanova and Johansen, Tor Arne",
publisher = "Springer",
series = "Lecture notes in control and information sciences",
edition = 2012,
month = mar,
year = 2012,
address = "Berlin, Germany",
copyright = "https://www.springernature.com/gp/researchers/text-and-data-mining",
language = "en"
}
@book{fiacco1983introduction,
title={Introduction to Sensitivity and Stability Analysis in Nonlinear Programming},
author={Fiacco, Anthony V.},
year={1983},
publisher={Academic Press}
}
@article{Sawaguchi2008,
title = {A Model-Predictive Hypnosis Control System Under Total Intravenous Anesthesia},
volume = {55},
ISSN = {0018-9294},
url = {http://dx.doi.org/10.1109/tbme.2008.915670},
DOI = {10.1109/tbme.2008.915670},
number = {3},
journal = {IEEE Transactions on Biomedical Engineering},
publisher = {Institute of Electrical and Electronics Engineers (IEEE)},
author = {Sawaguchi, Y. and Furutani, E. and Shirakami, G. and Araki, M. and Fukuda, K.},
year = {2008},
month = mar,
pages = {874–887}
}
@article{Adams2009,
title = {Spending on new drug development1},
volume = {19},
ISSN = {1099-1050},
url = {http://dx.doi.org/10.1002/hec.1454},
DOI = {10.1002/hec.1454},
number = {2},
journal = {Health Economics},
publisher = {Wiley},
author = {Adams, Christopher Paul and Brantner, Van Vu},
year = {2009},
month = feb,
pages = {130–141}
}
@book{Chang2010,
title={Monte Carlo Simulation for the Pharmaceutical Industry: Concepts, Algorithms, and Case Studies},
ISBN={9780429152382},
url={http://dx.doi.org/10.1201/EBK1439835920},
DOI={10.1201/ebk1439835920},
publisher={CRC Press},
author={Chang, Mark},
year={2010},
month=sep
}
@book{Conroy2013,
title = {Decision Making in Natural Resource Management: A Structured, Adaptive Approach: A Structured, Adaptive Approach},
ISBN = {9781118506196},
url = {http://dx.doi.org/10.1002/9781118506196},
DOI = {10.1002/9781118506196},
publisher = {Wiley},
author = {Conroy, Michael J. and Peterson, James T.},
year = {2013},
month = jan
}
@book{Puterman1994,
author = {Martin L. Puterman},
title = {Markov Decision Processes: Discrete Stochastic Dynamic Programming},
year = {1994},
publisher = {John Wiley \& Sons},
address = {New York},
isbn = {978-0-471-61977-3},
note = {First published in 1994},
}
@article{rust1987optimal,
author = {John Rust},
title = {Optimal Replacement of GMC Bus Engines: An Empirical Model of Harold Zurcher},
journal = {Econometrica},
volume = {55},
number = {5},
pages = {999-1033},
year = {1987},
publisher = {JSTOR}
}
@inproceedings{ziebart2008maximum,
author = {Brian D. Ziebart and Andrew L. Maas and J. Andrew Bagnell and Anind K. Dey},
title = {Maximum Entropy Inverse Reinforcement Learning},
booktitle = {Proceedings of the 23rd AAAI Conference on Artificial Intelligence},
year = {2008},
pages = {1433-1438}
}
@article{haarnoja2017reinforcement,
author = {Tuomas Haarnoja and Haoran Tang and Pieter Abbeel and Sergey Levine},
title = {Reinforcement Learning with Deep Energy-Based Policies},
journal = {Proceedings of the 34th International Conference on Machine Learning},
year = {2017},
pages = {1352-1361},
volume = {70},
publisher = {PMLR}
}
@article{levine2018reinforcement,
author = {Sergey Levine and Aviral Kumar and George Tucker and Justin Fu},
title = {Reinforcement Learning as a Framework for Control: A Survey},
journal = {arXiv preprint arXiv:1806.04222},
year = {2018}
}
@InProceedings{geist2019,
title = {A Theory of Regularized {M}arkov Decision Processes},
author = {Geist, Matthieu and Scherrer, Bruno and Pietquin, Olivier},
booktitle = {Proceedings of the 36th International Conference on Machine Learning},
pages = {2160--2169},
year = {2019},
editor = {Chaudhuri, Kamalika and Salakhutdinov, Ruslan},
volume = {97},
series = {Proceedings of Machine Learning Research},
month = {09--15 Jun},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v97/geist19a/geist19a.pdf},
url = {https://proceedings.mlr.press/v97/geist19a.html},
abstract = {Many recent successful (deep) reinforcement learning algorithms make use of regularization, generally based on entropy or Kullback-Leibler divergence. We propose a general theory of regularized Markov Decision Processes that generalizes these approaches in two directions: we consider a larger class of regularizers, and we consider the general modified policy iteration approach, encompassing both policy iteration and value iteration. The core building blocks of this theory are a notion of regularized Bellman operator and the Legendre-Fenchel transform, a classical tool of convex optimization. This approach allows for error propagation analyses of general algorithmic schemes of which (possibly variants of) classical algorithms such as Trust Region Policy Optimization, Soft Q-learning, Stochastic Actor Critic or Dynamic Policy Programming are special cases. This also draws connections to proximal convex optimization, especially to Mirror Descent.}
}
@article{Bertsekas1983,
title = {Distributed asynchronous computation of fixed points},
volume = {27},
ISSN = {1436-4646},
url = {http://dx.doi.org/10.1007/BF02591967},
DOI = {10.1007/bf02591967},
number = {1},
journal = {Mathematical Programming},
publisher = {Springer Science and Business Media LLC},
author = {Bertsekas, Dimitri P.},
year = {1983},
month = sep,
pages = {107–120}
}
@inproceedings{haarnoja2018soft,
title={Soft actor-critic: Off-policy maximum entropy deep reinforcement learning with a stochastic actor},
author={Haarnoja, Tuomas and Zhou, Aurick and Abbeel, Pieter and Levine, Sergey},
booktitle={Proceedings of the 35th International Conference on Machine Learning (ICML)},
pages={1861--1870},
year={2018},
organization={PMLR}
}
@misc{kortum1992value,
author = {Kortum, Samuel},
title = {Value Function Approximation in an Estimation Routine},
year = {1992},
note = {Manuscript, Boston University},
}
@inbook{Rust1996,
title = {Chapter 14 Numerical dynamic programming in economics},
ISSN = {1574-0021},
url = {http://dx.doi.org/10.1016/S1574-0021(96)01016-7},
DOI = {10.1016/s1574-0021(96)01016-7},
booktitle = {Handbook of Computational Economics},
publisher = {Elsevier},
author = {Rust, John},
year = {1996},
pages = {619–729}
}
@article{ernst2005tree,
author = {Ernst, Damien and Geurts, Pierre and Wehenkel, Louis},
title = {Tree-Based Batch Mode Reinforcement Learning},
journal = {Journal of Machine Learning Research},
volume = {6},
pages = {503--556},
year = {2005},
}
@inproceedings{riedmiller2005neural,
author = {Riedmiller, Martin},
title = {Neural Fitted Q Iteration – First Experiences with a Data Efficient Neural Reinforcement Learning Method},
booktitle = {Proceedings of the 16th European Conference on Machine Learning (ECML)},
pages = {317--328},
year = {2005},
publisher = {Springer},
address = {Berlin, Heidelberg},
}
@article{Ormoneit2002,
volume = {49},
ISSN = {0885-6125},
url = {http://dx.doi.org/10.1023/A:1017928328829},
DOI = {10.1023/a:1017928328829},
number = {2/3},
title = {Kernel-Based Reinforcement Learning},
journal = {Machine Learning},
publisher = {Springer Science and Business Media LLC},
author = {Ormoneit, Dirk and Sen, Śaunak},
year = {2002},
pages = {161–178}
}
@article{ErnstGW05,
author = {Damien Ernst and
Pierre Geurts and
Louis Wehenkel},
title = {Tree-Based Batch Mode Reinforcement Learning},
journal = {J. Mach. Learn. Res.},
volume = {6},
pages = {503--556},
year = {2005},
url = {https://jmlr.org/papers/v6/ernst05a.html}
}
@article{Geurts2006,
title = {Extremely randomized trees},
volume = {63},
ISSN = {1573-0565},
url = {http://dx.doi.org/10.1007/s10994-006-6226-1},
DOI = {10.1007/s10994-006-6226-1},
number = {1},
journal = {Machine Learning},
publisher = {Springer Science and Business Media LLC},
author = {Geurts, Pierre and Ernst, Damien and Wehenkel, Louis},
year = {2006},
month = mar,
pages = {3–42}
}
@inproceedings{Riedmiller05,
author = {Martin A. Riedmiller},
editor = {Jo{\~{a}}o Gama and
Rui Camacho and
Pavel Brazdil and
Al{\'{\i}}pio Jorge and
Lu{\'{\i}}s Torgo},
title = {Neural Fitted {Q} Iteration - First Experiences with a Data Efficient
Neural Reinforcement Learning Method},
booktitle = {Machine Learning: {ECML} 2005, 16th European Conference on Machine
Learning, Porto, Portugal, October 3-7, 2005, Proceedings},
series = {Lecture Notes in Computer Science},
volume = {3720},
pages = {317--328},
publisher = {Springer},
year = {2005},
url = {https://doi.org/10.1007/11564096\_32},
doi = {10.1007/11564096\_32}
}
@inproceedings{Gordon1995,
author = {Gordon, Geoffrey J.},
title = {Stable function approximation in dynamic programming},
year = {1995},
isbn = {1558603778},
publisher = {Morgan Kaufmann Publishers Inc.},
address = {San Francisco, CA, USA},
booktitle = {Proceedings of the Twelfth International Conference on International Conference on Machine Learning},
pages = {261–268},
numpages = {8},
location = {Tahoe City, California, USA},
series = {ICML'95}
}
@article{Hafner2011,
title = {Reinforcement learning in feedback control: Challenges and benchmarks from technical process control},
volume = {84},
ISSN = {1573-0565},
url = {http://dx.doi.org/10.1007/s10994-011-5235-x},
DOI = {10.1007/s10994-011-5235-x},
number = {1-2},
journal = {Machine Learning},
publisher = {Springer Science and Business Media LLC},
author = {Hafner, Roland and Riedmiller, Martin},
year = {2011},
month = {feb},
pages = {137-169}
}
@inproceedings{mnih2013atari,
title={Playing Atari with Deep Reinforcement Learning},
author={Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and Rusu, Andrei A and Veness, Joel and Bellemare, Marc G and Graves, Alex and Riedmiller, Martin and Fidjeland, Andreas K and Ostrovski, Georg and others},
booktitle={NIPS Deep Learning Workshop},
year={2013}
}
@article{lillicrap2015continuous,
title={Continuous Control with Deep Reinforcement Learning},
author={Lillicrap, Timothy P and Hunt, Jonathan J and Pritzel, Alexander and Heess, Nicolas and Erez, Tom and Tassa, Yuval and Silver, David and Wierstra, Daan},
journal={arXiv preprint arXiv:1509.02971},
year={2015}
}
@inproceedings{fujimoto2018addressing,
title={Addressing Function Approximation Error in Actor-Critic Methods},
author={Fujimoto, Scott and Hoof, Herke and Meger, David},
booktitle={International Conference on Machine Learning (ICML)},
pages={1587--1596},
year={2018}
}
@inproceedings{haarnoja2018soft,
title={Soft Actor-Critic: Off-Policy Maximum Entropy Deep Reinforcement Learning with a Stochastic Actor},
author={Haarnoja, Tuomas and Zhou, Aurick and Abbeel, Pieter and Levine, Sergey},
booktitle={International Conference on Machine Learning (ICML)},
pages={1861--1870},
year={2018}
}
@inproceedings{ash2020warm,
title={Warm-starting and Amortization in Continual Learning},
author={Ash, Jordan T and Adams, Ryan P},
booktitle={International Conference on Learning Representations (ICLR)},
year={2020}
}
@inproceedings{Doro2023,
author = {Pierluca D'Oro and
Max Schwarzer and
Evgenii Nikishin and
Pierre-Luc Bacon and
Marc G. Bellemare and
Aaron C. Courville},
title = {Sample-Efficient Reinforcement Learning by Breaking the Replay Ratio
Barrier},
booktitle = {The Eleventh International Conference on Learning Representations,
{ICLR} 2023, Kigali, Rwanda, May 1-5, 2023},
publisher = {OpenReview.net},
year = {2023}
}
@phdthesis{lin1992self,
title={Self-improving reactive agents based on reinforcement learning, planning, and teaching},
author={Lin, Long-Ji},
year={1992},
school={Carnegie Mellon University},
address={Pittsburgh, PA, USA},
note={Technical Report, CMU-CS-92-170}
}
@article{van2016deep,
title={Deep Reinforcement Learning with Double Q-learning},
author={Van Hasselt, Hado and Guez, Arthur and Silver, David},
journal={Proceedings of the AAAI Conference on Artificial Intelligence},
volume={30},
number={1},
year={2016},
publisher={AAAI Press}
}