-
Notifications
You must be signed in to change notification settings - Fork 5
/
draft_bib.html
662 lines (619 loc) · 25.1 KB
/
draft_bib.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
</dt-appendix>
</body>
<script type="text/bibliography">
@inproceedings{xue2016visual,
title={Visual dynamics: Probabilistic future frame synthesis via cross convolutional networks},
author={Xue, Tianfan and Wu, Jiajun and Bouman, Katherine and Freeman, Bill},
booktitle={Advances in Neural Information Processing Systems},
year={2016}
}
@article{lotter2016deep,
title={Deep predictive coding networks for video prediction and unsupervised learning},
author={Lotter, William and Kreiman, Gabriel and Cox, David},
journal={arXiv preprint arXiv:1605.08104},
year={2016}
}
@article{villegas2017hierarchical,
title={Learning to generate long-term future via hierarchical prediction},
author={Villegas, Ruben and Yang, Jimei and Zou, Yuliang and Sohn, Sungryull and Lin, Xunyu and Lee, Honglak},
journal={arXiv preprint arXiv:1704.05831},
year={2017}
}
@article{villegas2017decomposing,
title={Decomposing motion and content for natural video sequence prediction},
author={Villegas, Ruben and Yang, Jimei and Hong, Seunghoon and Lin, Xunyu and Lee, Honglak},
journal={arXiv preprint arXiv:1706.08033},
year={2017}
}
@inproceedings{finn2016unsupervised,
title={Unsupervised learning for physical interaction through video prediction},
author={Finn, Chelsea and Goodfellow, Ian and Levine, Sergey},
booktitle={Advances in neural information processing systems},
pages={64--72},
year={2016}
}
@inproceedings{vondrick2016generating,
title={Generating videos with scene dynamics},
author={Vondrick, Carl and Pirsiavash, Hamed and Torralba, Antonio},
booktitle={Advances In Neural Information Processing Systems},
year={2016}
}
pages={613--621},
@article{mathieu2015deep,
title={Deep multi-scale video prediction beyond mean square error},
author={Mathieu, Michael and Couprie, Camille and LeCun, Yann},
journal={arXiv preprint arXiv:1511.05440},
year={2015}
}
@article{kalchbrenner2016vpn,
title={Video pixel networks},
author={Kalchbrenner, Nal and Oord, Aaron van den and Simonyan, Karen and Danihelka, Ivo and Vinyals, Oriol and Graves, Alex and Kavukcuoglu, Koray},
journal={arXiv preprint arXiv:1610.00527},
year={2016}
}
@article{babaeizadeh2017sv2p,
title={Stochastic Variational Video Prediction},
author={Babaeizadeh, Mohammad and Finn, Chelsea and Erhan, Dumitru and Campbell, Roy H and Levine, Sergey},
journal={arXiv preprint arXiv:1710.11252},
year={2017}
}
@article{denton2018stochastic,
title={Stochastic Video Generation with a Learned Prior},
author={Denton, Emily and Fergus, Rob},
journal={arXiv preprint arXiv:1802.07687},
year={2018}
}
@article{nagabandi2017mbmf,
title={Neural network dynamics for model-based deep reinforcement learning with model-free fine-tuning},
author={Nagabandi, Anusha and Kahn, Gregory and Fearing, Ronald S and Levine, Sergey},
journal={arXiv preprint arXiv:1708.02596},
year={2017}
}
@article{bansal2017mbmf,
title={MBMF: Model-Based Priors for Model-Free Reinforcement Learning},
author={Bansal, Somil and Calandra, Roberto and Levine, Sergey and Tomlin, Claire},
journal={arXiv preprint arXiv:1709.03153},
year={2017}
}
@inproceedings{watter2015e2c,
title={Embed to control: A locally linear latent dynamics model for control from raw images},
author={Watter, Manuel and Springenberg, Jost and Boedecker, Joschka and Riedmiller, Martin},
booktitle={Advances in neural information processing systems},
pages={2746--2754},
year={2015}
}
@article{banijamali2017rce,
title={Robust locally-linear controllable embedding},
author={Banijamali, Ershad and Shu, Rui and Ghavamzadeh, Mohammad and Bui, Hung and Ghodsi, Ali},
journal={arXiv preprint arXiv:1710.05373},
year={2017}
}
@article{buesing2018dssm,
title={Learning and Querying Fast Generative Models for Reinforcement Learning},
author={Buesing, Lars and Weber, Theophane and Racaniere, Sebastien and Eslami, SM and Rezende, Danilo and Reichert, David P and Viola, Fabio and Besse, Frederic and Gregor, Karol and Hassabis, Demis and others},
journal={arXiv preprint arXiv:1802.03006},
year={2018}
}
@article{ebert2017,
title={Self-supervised visual planning with temporal skip connections},
author={Ebert, Frederik and Finn, Chelsea and Lee, Alex X. and Levine, Sergey},
journal={Conference on Robot Learning},
year={2017}
}
@article{banijamali2017disentangling,
title={Disentangling Dynamics and Content for Control and Planning},
author={Banijamali, Ershad and Khajenezhad, Ahmad and Ghodsi, Ali and Ghavamzadeh, Mohammad},
journal={arXiv preprint arXiv:1711.09165},
year={2017}
}
@article{wahlstrom2015pixels,
title={Learning deep dynamical models from image pixels},
author={Wahlstr{\"o}m, Niklas and Sch{\"o}n, Thomas B and Deisenroth, Marc Peter},
journal={IFAC-PapersOnLine},
volume={48},
number={28},
pages={1059--1064},
year={2015},
publisher={Elsevier}
}
@inproceedings{amos2018awareness,
title={Learning Awareness Models},
author={Brandon Amos and Laurent Dinh and Serkan Cabi and Thomas Rothörl and Alistair Muldal and Tom Erez and Yuval Tassa and Nando de Freitas and Misha Denil},
booktitle={International Conference on Learning Representations},
year={2018}
}
@inproceedings{kalweit2017blending,
title={Uncertainty-driven Imagination for Continuous Deep Reinforcement Learning},
author={Kalweit, Gabriel and Boedecker, Joschka},
booktitle={Conference on Robot Learning},
pages={195--206},
year={2017}
}
@article{higuera2018synthesizing,
title={Synthesizing Neural Network Controllers with Probabilistic Model based Reinforcement Learning},
author={Higuera, Juan Camilo Gamboa and Meger, David and Dudek, Gregory},
journal={arXiv preprint arXiv:1803.02291},
year={2018}
}
@inproceedings{deisenroth2011pilco,
title={PILCO: A model-based and data-efficient approach to policy search},
author={Deisenroth, Marc and Rasmussen, Carl E},
booktitle={Proceedings of the 28th International Conference on machine learning (ICML-11)},
pages={465--472},
year={2011}
}
@inproceedings{gal2016deeppilco,
title={Improving PILCO with Bayesian neural network dynamics models},
author={Gal, Yarin and McAllister, Rowan and Rasmussen, Carl Edward},
booktitle={Data-Efficient Machine Learning workshop, ICML},
year={2016}
}
@article{rusu2016progressive,
title={Progressive neural networks},
author={Rusu, Andrei A and Rabinowitz, Neil C and Desjardins, Guillaume and Soyer, Hubert and Kirkpatrick, James and Kavukcuoglu, Koray and Pascanu, Razvan and Hadsell, Raia},
journal={arXiv preprint arXiv:1606.04671},
year={2016}
}
@inproceedings{teh2017distral,
title={Distral: Robust multitask reinforcement learning},
author={Teh, Yee and Bapst, Victor and Czarnecki, Wojciech M and Quan, John and Kirkpatrick, James and Hadsell, Raia and Heess, Nicolas and Pascanu, Razvan},
booktitle={Advances in Neural Information Processing Systems},
pages={4499--4509},
year={2017}
}
@article{sutton1991dyna,
title={Dyna, an integrated architecture for learning, planning, and reacting},
author={Sutton, Richard S},
journal={ACM SIGART Bulletin},
volume={2},
number={4},
pages={160--163},
year={1991},
publisher={ACM}
}
@incollection{ha2018worldmodels,
title = {Recurrent World Models Facilitate Policy Evolution},
author = {Ha, David and Schmidhuber, J{\"u}rgen},
booktitle = {Advances in Neural Information Processing Systems 31},
pages = {2451--2463},
year = {2018},
url = {https://papers.nips.cc/paper/7512-recurrent-world-models-facilitate-policy-evolution},
note="\url{https://worldmodels.github.io}",
}
@article{henaff2018planbybackprop,
title={Model-Based Planning with Discrete and Continuous Actions},
author={Henaff, Mikael and Whitney, William F and LeCun, Yann},
journal={arXiv preprint arXiv:1705.07177},
year={2018}
}
@inproceedings{heess2015svg,
title={Learning continuous control policies by stochastic value gradients},
author={Heess, Nicolas and Wayne, Gregory and Silver, David and Lillicrap, Tim and Erez, Tom and Tassa, Yuval},
booktitle={Advances in Neural Information Processing Systems},
pages={2944--2952},
year={2015}
}
@inproceedings{finn2017foresight,
title={Deep visual foresight for planning robot motion},
author={Finn, Chelsea and Levine, Sergey},
booktitle={Robotics and Automation (ICRA), 2017 IEEE International Conference on},
pages={2786--2793},
year={2017},
organization={IEEE}
}
@article{kingma2013vae,
title={Auto-encoding variational bayes},
author={Kingma, Diederik P and Welling, Max},
journal={arXiv preprint arXiv:1312.6114},
year={2013}
}
@article{rezende2014vae,
title={Stochastic backpropagation and approximate inference in deep generative models},
author={Rezende, Danilo Jimenez and Mohamed, Shakir and Wierstra, Daan},
journal={arXiv preprint arXiv:1401.4082},
year={2014}
}
@article{rao2009control,
title={A survey of numerical methods for optimal control},
author={Rao, Anil V},
journal={Advances in the Astronautical Sciences},
volume={135},
number={1},
pages={497--528},
year={2009},
publisher={Univelt, Inc.}
}
@article{weber2017i2a,
title={Imagination-augmented agents for deep reinforcement learning},
author={Weber, Th{\'e}ophane and Racani{\`e}re, S{\'e}bastien and Reichert, David P and Buesing, Lars and Guez, Arthur and Rezende, Danilo Jimenez and Badia, Adria Puigdom{\`e}nech and Vinyals, Oriol and Heess, Nicolas and Li, Yujia and others},
journal={arXiv preprint arXiv:1707.06203},
year={2017}
}
@inproceedings{oh2015atari,
title={Action-conditional video prediction using deep networks in atari games},
author={Oh, Junhyuk and Guo, Xiaoxiao and Lee, Honglak and Lewis, Richard L and Singh, Satinder},
booktitle={Advances in Neural Information Processing Systems},
pages={2863--2871},
year={2015}
}
@article{kurutach2018modeltrpo,
title={Model-ensemble trust-region policy optimization},
author={Kurutach, Thanard and Clavera, Ignasi and Duan, Yan and Tamar, Aviv and Abbeel, Pieter},
journal={arXiv preprint arXiv:1802.10592},
year={2018}
}
@inproceedings{kalweit2017modelddpg,
title={Uncertainty-driven Imagination for Continuous Deep Reinforcement Learning},
author={Kalweit, Gabriel and Boedecker, Joschka},
booktitle={Conference on Robot Learning},
pages={195--206},
year={2017}
}
@inproceedings{pathak2017mario,
title={Curiosity-driven exploration by self-supervised prediction},
author={Pathak, Deepak and Agrawal, Pulkit and Efros, Alexei A and Darrell, Trevor},
booktitle={International Conference on Machine Learning (ICML)},
volume={2017},
year={2017}
}
@inproceedings{chung2015vrnn,
title={A recurrent latent variable model for sequential data},
author={Chung, Junyoung and Kastner, Kyle and Dinh, Laurent and Goel, Kratarth and Courville, Aaron C and Bengio, Yoshua},
booktitle={Advances in neural information processing systems},
pages={2980--2988},
year={2015}
}
@inproceedings{van2017vq,
title={Neural discrete representation learning},
author={van den Oord, Aaron and Vinyals, Oriol and others},
booktitle={Advances in Neural Information Processing Systems},
pages={6309--6318},
year={2017}
}
@article{hoffman2013svi,
title={Stochastic variational inference},
author={Hoffman, Matthew D and Blei, David M and Wang, Chong and Paisley, John},
journal={The Journal of Machine Learning Research},
volume={14},
number={1},
pages={1303--1347},
year={2013},
publisher={JMLR. org}
}
@phdthesis{richards2005mpc,
title={Robust constrained model predictive control},
author={Richards, Arthur George},
year={2005},
school={Massachusetts Institute of Technology}
}
@article{rubinstein1997cem,
title={Optimization of computer simulation models with rare events},
author={Rubinstein, Reuven Y},
journal={European Journal of Operational Research},
volume={99},
number={1},
pages={89--112},
year={1997},
publisher={Elsevier}
}
@inproceedings{hansen1996cma,
title={Adapting arbitrary normal mutation distributions in evolution strategies: The covariance matrix adaptation},
author={Hansen, Nikolaus and Ostermeier, Andreas},
booktitle={Evolutionary Computation, 1996., Proceedings of IEEE International Conference on},
pages={312--317},
year={1996},
organization={IEEE}
}
@article{tassa2018dmcontrol,
title={DeepMind Control Suite},
author={Tassa, Yuval and Doron, Yotam and Muldal, Alistair and Erez, Tom and Li, Yazhe and Casas, Diego de Las and Budden, David and Abdolmaleki, Abbas and Merel, Josh and Lefrancq, Andrew and others},
journal={arXiv preprint arXiv:1801.00690},
year={2018}
}
@article{mackay1992infogain,
title={Information-based objective functions for active data selection},
author={MacKay, David JC},
journal={Neural computation},
volume={4},
number={4},
pages={590--604},
year={1992},
publisher={MIT Press}
}
@article{wayne2018merlin,
title={Unsupervised Predictive Memory in a Goal-Directed Agent},
author={Wayne, Greg and Hung, Chia-Chun and Amos, David and Mirza, Mehdi and Ahuja, Arun and Grabska-Barwinska, Agnieszka and Rae, Jack and Mirowski, Piotr and Leibo, Joel Z and Santoro, Adam and others},
journal={arXiv preprint arXiv:1803.10760},
year={2018}
}
@article{henaff2017planbybackprop,
author = {Mikael Henaff and William F. Whitney and Yann LeCun},
title = {Model-Based Planning in Discrete Action Spaces},
journal = {CoRR},
volume = {abs/1705.07177},
year = {2017},
url = {http://arxiv.org/abs/1705.07177},
archivePrefix = {arXiv},
eprint = {1705.07177},
timestamp = {Wed, 07 Jun 2017 14:42:08 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/HenaffWL17},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{gemici2017temporalmemory,
title={Generative Temporal Models with Memory},
author={Gemici, Mevlana and Hung, Chia-Chun and Santoro, Adam and Wayne, Greg and Mohamed, Shakir and Rezende, Danilo J and Amos, David and Lillicrap, Timothy},
journal={arXiv preprint arXiv:1702.04649},
year={2017}
}
@inproceedings{higgins2016beta,
title={beta-vae: Learning basic visual concepts with a constrained variational framework},
author={Higgins, Irina and Matthey, Loic and Pal, Arka and Burgess, Christopher and Glorot, Xavier and Botvinick, Matthew and Mohamed, Shakir and Lerchner, Alexander},
booktitle={International Conference on Learning Representations},
year={2016}
}
@article{kingma2014adam,
title={Adam: A method for stochastic optimization},
author={Kingma, Diederik P and Ba, Jimmy},
journal={arXiv preprint arXiv:1412.6980},
year={2014}
}
@article{wayne2018unsupervised,
title={Unsupervised Predictive Memory in a Goal-Directed Agent},
author={Wayne, Greg and Hung, Chia-Chun and Amos, David and Mirza, Mehdi and Ahuja, Arun and Grabska-Barwinska, Agnieszka and Rae, Jack and Mirowski, Piotr and Leibo, Joel Z and Santoro, Adam and others},
journal={arXiv preprint arXiv:1803.10760},
year={2018}
}
@article{chiappa2017recurrent,
title={Recurrent environment simulators},
author={Chiappa, Silvia and Racaniere, S{\'e}bastien and Wierstra, Daan and Mohamed, Shakir},
journal={arXiv preprint arXiv:1704.02254},
year={2017}
}
@inproceedings{mnih2016a3c,
title={Asynchronous methods for deep reinforcement learning},
author={Mnih, Volodymyr and Badia, Adria Puigdomenech and Mirza, Mehdi and Graves, Alex and Lillicrap, Timothy and Harley, Tim and Silver, David and Kavukcuoglu, Koray},
booktitle={International Conference on Machine Learning},
pages={1928--1937},
year={2016}
}
@article{schulman2017ppo,
title={Proximal policy optimization algorithms},
author={Schulman, John and Wolski, Filip and Dhariwal, Prafulla and Radford, Alec and Klimov, Oleg},
journal={arXiv preprint arXiv:1707.06347},
year={2017}
}
@article{hafner2017tfagents,
title={TensorFlow Agents: Efficient Batched Reinforcement Learning in TensorFlow},
author={Hafner, Danijar and Davidson, James and Vanhoucke, Vincent},
journal={arXiv preprint arXiv:1709.02878},
year={2017}
}
@article{barth2018d4pg,
title={Distributed Distributional Deterministic Policy Gradients},
author={Barth-Maron, Gabriel and Hoffman, Matthew W and Budden, David and Dabney, Will and Horgan, Dan and Muldal, Alistair and Heess, Nicolas and Lillicrap, Timothy},
journal={arXiv preprint arXiv:1804.08617},
year={2018}
}
@inproceedings{alemi18fixing,
title={Fixing a Broken ELBO},
author={Alemi, Alexander A and Poole, Ben and Fischer, Ian and Dillon, Joshua V and Saurous, Rif A and Murphy, Kevin},
year={2018},
booktitle={Proceedings of the 35th International Conference on Machine Learning (ICML-18)}
}
@article{szegedy2013intriguing,
title={Intriguing properties of neural networks},
author={Szegedy, Christian and Zaremba, Wojciech and Sutskever, Ilya and Bruna, Joan and Erhan, Dumitru and Goodfellow, Ian and Fergus, Rob},
journal={arXiv preprint arXiv:1312.6199},
year={2013}
}
@article{chua2018deep,
author = {{Chua}, K. and {Calandra}, R. and {McAllister}, R. and {Levine}, S.},
title = "{Deep Reinforcement Learning in a Handful of Trials using Probabilistic Dynamics Models}",
journal = {ArXiv e-prints},
archivePrefix = "arXiv",
eprint = {1805.12114},
primaryClass = "cs.LG",
keywords = {Computer Science - Learning, Computer Science - Artificial Intelligence, Computer Science - Robotics, Statistics - Machine Learning},
year = 2018,
month = may,
}
@article{karl2016dvbf,
title={Deep variational bayes filters: Unsupervised learning of state space models from raw data},
author={Karl, Maximilian and Soelch, Maximilian and Bayer, Justin and van der Smagt, Patrick},
journal={arXiv preprint arXiv:1605.06432},
year={2016}
}
@article{krishnan2015deepkalman,
title={Deep kalman filters},
author={Krishnan, Rahul G and Shalit, Uri and Sontag, David},
journal={arXiv preprint arXiv:1511.05121},
year={2015}
}
@article{gregor2018tdvae,
title={Temporal Difference Variational Auto-Encoder},
author={Gregor, Karol and Besse, Frederic},
journal={arXiv preprint arXiv:1806.03107},
year={2018}
}
@article{chua2018pets,
title={Deep Reinforcement Learning in a Handful of Trials using Probabilistic Dynamics Models},
author={Chua, Kurtland and Calandra, Roberto and McAllister, Rowan and Levine, Sergey},
journal={arXiv preprint arXiv:1805.12114},
year={2018}
}
@article{buckman2018steve,
title={Sample-Efficient Reinforcement Learning with Stochastic Ensemble Value Expansion},
author={Buckman, Jacob and Hafner, Danijar and Tucker, George and Brevdo, Eugene and Lee, Honglak},
journal={arXiv preprint arXiv:1807.01675},
year={2018}
}
@article{doerr2018prssm,
title={Probabilistic Recurrent State-Space Models},
author={Doerr, Andreas and Daniel, Christian and Schiegg, Martin and Nguyen-Tuong, Duy and Schaal, Stefan and Toussaint, Marc and Trimpe, Sebastian},
journal={arXiv preprint arXiv:1801.10395},
year={2018}
}
@inproceedings{lamb2016professor,
title={Professor forcing: A new algorithm for training recurrent networks},
author={Lamb, Alex M and GOYAL, Anirudh Goyal ALIAS PARTH and Zhang, Ying and Zhang, Saizheng and Courville, Aaron C and Bengio, Yoshua},
booktitle={Advances In Neural Information Processing Systems},
pages={4601--4609},
year={2016}
}
@article{srinivas2018upn,
title={Universal Planning Networks},
author={Srinivas, Aravind and Jabri, Allan and Abbeel, Pieter and Levine, Sergey and Finn, Chelsea},
journal={arXiv preprint arXiv:1804.00645},
year={2018}
}
@inproceedings{nair2010relu,
title={Rectified linear units improve restricted boltzmann machines},
author={Nair, Vinod and Hinton, Geoffrey E},
booktitle={Proceedings of the 27th international conference on machine learning (ICML-10)},
pages={807--814},
year={2010}
}
@article{cho2014gru,
title={Learning phrase representations using RNN encoder-decoder for statistical machine translation},
author={Cho, Kyunghyun and Van Merri{\"e}nboer, Bart and Gulcehre, Caglar and Bahdanau, Dzmitry and Bougares, Fethi and Schwenk, Holger and Bengio, Yoshua},
journal={arXiv preprint arXiv:1406.1078},
year={2014}
}
@inproceedings{bengio2015scheduled,
title={Scheduled sampling for sequence prediction with recurrent neural networks},
author={Bengio, Samy and Vinyals, Oriol and Jaitly, Navdeep and Shazeer, Noam},
booktitle={Advances in Neural Information Processing Systems},
pages={1171--1179},
year={2015}
}
@inproceedings{talvitie2014hallucinated,
title={Model Regularization for Stable Sample Rollouts.},
author={Talvitie, Erik},
booktitle={UAI},
pages={780--789},
year={2014}
}
@inproceedings{venkatraman2015dad,
title={Improving Multi-Step Prediction of Learned Time Series Models.},
author={Venkatraman, Arun and Hebert, Martial and Bagnell, J Andrew},
booktitle={AAAI},
pages={3024--3030},
year={2015}
}
@article{igl2018dvrl,
title={Deep Variational Reinforcement Learning for POMDPs},
author={Igl, Maximilian and Zintgraf, Luisa and Le, Tuan Anh and Wood, Frank and Whiteson, Shimon},
journal={arXiv preprint arXiv:1806.02426},
year={2018}
}
@article{silver2017alphago,
title={Mastering the game of Go without human knowledge},
author={Silver, David and Schrittwieser, Julian and Simonyan, Karen and Antonoglou, Ioannis and Huang, Aja and Guez, Arthur and Hubert, Thomas and Baker, Lucas and Lai, Matthew and Bolton, Adrian and others},
journal={Nature},
volume={550},
number={7676},
pages={354},
year={2017},
publisher={Nature Publishing Group}
}
@inproceedings{tassa2012mpc,
title={Synthesis and stabilization of complex behaviors through online trajectory optimization},
author={Tassa, Yuval and Erez, Tom and Todorov, Emanuel},
booktitle={Intelligent Robots and Systems (IROS), 2012 IEEE/RSJ International Conference on},
pages={4906--4913},
year={2012},
organization={IEEE}
}
@inproceedings{tassa2014mpc,
title={Control-limited differential dynamic programming},
author={Tassa, Yuval and Mansard, Nicolas and Todorov, Emo},
booktitle={Robotics and Automation (ICRA), 2014 IEEE International Conference on},
pages={1168--1175},
year={2014},
organization={IEEE}
}
@article{moravvcik2017deepstack,
title={Deepstack: Expert-level artificial intelligence in heads-up no-limit poker},
author={Moravčík, Matej and Schmid, Martin and Burch, Neil and Lisý, Viliam and Morrill, Dustin and Bard, Nolan and Davis, Trevor and Waugh, Kevin and Johanson, Michael and Bowling, Michael},
journal={Science},
volume={356},
number={6337},
pages={508--513},
year={2017},
publisher={American Association for the Advancement of Science}
}
@article{mnih2015dqn,
title={Human-level control through deep reinforcement learning},
author={Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and Rusu, Andrei A and Veness, Joel and Bellemare, Marc G and Graves, Alex and Riedmiller, Martin and Fidjeland, Andreas K and Ostrovski, Georg and others},
journal={Nature},
volume={518},
number={7540},
pages={529},
year={2015},
publisher={Nature Publishing Group}
}
@article{moerland2017learning,
title={Learning multimodal transition dynamics for model-based reinforcement learning},
author={Moerland, Thomas M and Broekens, Joost and Jonker, Catholijn M},
journal={arXiv preprint arXiv:1705.00470},
year={2017}
}
@article{ebert2017visualmpc,
title={Self-supervised visual planning with temporal skip connections},
author={Ebert, Frederik and Finn, Chelsea and Lee, Alex X and Levine, Sergey},
journal={arXiv preprint arXiv:1710.05268},
year={2017}
}
@article{dillon2017tfd,
title={TensorFlow Distributions},
author={Dillon, Joshua V and Langmore, Ian and Tran, Dustin and Brevdo, Eugene and Vasudevan, Srinivas and Moore, Dave and Patton, Brian and Alemi, Alex and Hoffman, Matt and Saurous, Rif A},
journal={arXiv preprint arXiv:1711.10604},
year={2017}
}
@inproceedings{agrawal2016poking,
title={Learning to poke by poking: Experiential learning of intuitive physics},
author={Agrawal, Pulkit and Nair, Ashvin V and Abbeel, Pieter and Malik, Jitendra and Levine, Sergey},
booktitle={Advances in Neural Information Processing Systems},
pages={5074--5082},
year={2016}
}
@inproceedings{bellemare2016actiongap,
title={Increasing the Action Gap: New Operators for Reinforcement Learning.},
author={Bellemare, Marc G and Ostrovski, Georg and Guez, Arthur and Thomas, Philip S and Munos, R{\'e}mi},
booktitle={AAAI},
pages={1476--1483},
year={2016}
}
@article{kingma2018glow,
title={Glow: Generative flow with invertible 1x1 convolutions},
author={Kingma, Diederik P and Dhariwal, Prafulla},
journal={arXiv preprint arXiv:1807.03039},
year={2018}
}
@article{ebert2018foresight,
title={Visual Foresight: Model-Based Deep Reinforcement Learning for Vision-Based Robotic Control},
author={Ebert, Frederik and Finn, Chelsea and Dasari, Sudeep and Xie, Annie and Lee, Alex and Levine, Sergey},
journal={arXiv preprint arXiv:1812.00568},
year={2018}
}
@inproceedings{krishnan2017ssmelbo,
title={Structured Inference Networks for Nonlinear State Space Models.},
author={Krishnan, Rahul G and Shalit, Uri and Sontag, David},
booktitle={AAAI},
pages={2101--2109},
year={2017}
}
</script>
<script src="lib/blazy.js"></script>
<script>
var bLazy = new Blazy({
success: function(){
updateCounter();
}
});
var imageLoaded = 0;
function updateCounter() {
imageLoaded++;
console.log("blazy image loaded: "+imageLoaded);
}
</script>