draft_bib.html

</dt-appendix>
</body>
<script type="text/bibliography">
@inproceedings{xue2016visual,
  title={Visual dynamics: Probabilistic future frame synthesis via cross convolutional networks},
  author={Xue, Tianfan and Wu, Jiajun and Bouman, Katherine and Freeman, Bill},
  booktitle={Advances in Neural Information Processing Systems},
  year={2016}
}

@article{lotter2016deep,
  title={Deep predictive coding networks for video prediction and unsupervised learning},
  author={Lotter, William and Kreiman, Gabriel and Cox, David},
  journal={arXiv preprint arXiv:1605.08104},
  year={2016}
}

@article{villegas2017hierarchical,
  title={Learning to generate long-term future via hierarchical prediction},
  author={Villegas, Ruben and Yang, Jimei and Zou, Yuliang and Sohn, Sungryull and Lin, Xunyu and Lee, Honglak},
  journal={arXiv preprint arXiv:1704.05831},
  year={2017}
}

@article{villegas2017decomposing,
  title={Decomposing motion and content for natural video sequence prediction},
  author={Villegas, Ruben and Yang, Jimei and Hong, Seunghoon and Lin, Xunyu and Lee, Honglak},
  journal={arXiv preprint arXiv:1706.08033},
  year={2017}
}

@inproceedings{finn2016unsupervised,
  title={Unsupervised learning for physical interaction through video prediction},
  author={Finn, Chelsea and Goodfellow, Ian and Levine, Sergey},
  booktitle={Advances in neural information processing systems},
  pages={64--72},
  year={2016}
}

@inproceedings{vondrick2016generating,
  title={Generating videos with scene dynamics},
  author={Vondrick, Carl and Pirsiavash, Hamed and Torralba, Antonio},
  booktitle={Advances In Neural Information Processing Systems},
  year={2016}
}
pages={613--621},

@article{mathieu2015deep,
  title={Deep multi-scale video prediction beyond mean square error},
  author={Mathieu, Michael and Couprie, Camille and LeCun, Yann},
  journal={arXiv preprint arXiv:1511.05440},
  year={2015}
}

@article{kalchbrenner2016vpn,
  title={Video pixel networks},
  author={Kalchbrenner, Nal and Oord, Aaron van den and Simonyan, Karen and Danihelka, Ivo and Vinyals, Oriol and Graves, Alex and Kavukcuoglu, Koray},
  journal={arXiv preprint arXiv:1610.00527},
  year={2016}
}

@article{babaeizadeh2017sv2p,
  title={Stochastic Variational Video Prediction},
  author={Babaeizadeh, Mohammad and Finn, Chelsea and Erhan, Dumitru and Campbell, Roy H and Levine, Sergey},
  journal={arXiv preprint arXiv:1710.11252},
  year={2017}
}

@article{denton2018stochastic,
  title={Stochastic Video Generation with a Learned Prior},
  author={Denton, Emily and Fergus, Rob},
  journal={arXiv preprint arXiv:1802.07687},
  year={2018}
}

@article{nagabandi2017mbmf,
  title={Neural network dynamics for model-based deep reinforcement learning with model-free fine-tuning},
  author={Nagabandi, Anusha and Kahn, Gregory and Fearing, Ronald S and Levine, Sergey},
  journal={arXiv preprint arXiv:1708.02596},
  year={2017}
}

@article{bansal2017mbmf,
  title={MBMF: Model-Based Priors for Model-Free Reinforcement Learning},
  author={Bansal, Somil and Calandra, Roberto and Levine, Sergey and Tomlin, Claire},
  journal={arXiv preprint arXiv:1709.03153},
  year={2017}
}

@inproceedings{watter2015e2c,
  title={Embed to control: A locally linear latent dynamics model for control from raw images},
  author={Watter, Manuel and Springenberg, Jost and Boedecker, Joschka and Riedmiller, Martin},
  booktitle={Advances in neural information processing systems},
  pages={2746--2754},
  year={2015}
}

@article{banijamali2017rce,
  title={Robust locally-linear controllable embedding},
  author={Banijamali, Ershad and Shu, Rui and Ghavamzadeh, Mohammad and Bui, Hung and Ghodsi, Ali},
  journal={arXiv preprint arXiv:1710.05373},
  year={2017}
}

@article{buesing2018dssm,
  title={Learning and Querying Fast Generative Models for Reinforcement Learning},
  author={Buesing, Lars and Weber, Theophane and Racaniere, Sebastien and Eslami, SM and Rezende, Danilo and Reichert, David P and Viola, Fabio and Besse, Frederic and Gregor, Karol and Hassabis, Demis and others},
  journal={arXiv preprint arXiv:1802.03006},
  year={2018}
}

@article{ebert2017,
  title={Self-supervised visual planning with temporal skip connections},
  author={Ebert, Frederik and Finn, Chelsea and Lee, Alex X. and Levine, Sergey},
  journal={Conference on Robot Learning},
  year={2017}
}


@article{banijamali2017disentangling,
  title={Disentangling Dynamics and Content for Control and Planning},
  author={Banijamali, Ershad and Khajenezhad, Ahmad and Ghodsi, Ali and Ghavamzadeh, Mohammad},
  journal={arXiv preprint arXiv:1711.09165},
  year={2017}
}

@article{wahlstrom2015pixels,
  title={Learning deep dynamical models from image pixels},
  author={Wahlstr{\"o}m, Niklas and Sch{\"o}n, Thomas B and Deisenroth, Marc Peter},
  journal={IFAC-PapersOnLine},
  volume={48},
  number={28},
  pages={1059--1064},
  year={2015},
  publisher={Elsevier}
}

@inproceedings{amos2018awareness,
  title={Learning Awareness Models},
  author={Brandon Amos and Laurent Dinh and Serkan Cabi and Thomas Rothörl and Alistair Muldal and Tom Erez and Yuval Tassa and Nando de Freitas and Misha Denil},
  booktitle={International Conference on Learning Representations},
  year={2018}
}

@inproceedings{kalweit2017blending,
  title={Uncertainty-driven Imagination for Continuous Deep Reinforcement Learning},
  author={Kalweit, Gabriel and Boedecker, Joschka},
  booktitle={Conference on Robot Learning},
  pages={195--206},
  year={2017}
}

@article{higuera2018synthesizing,
  title={Synthesizing Neural Network Controllers with Probabilistic Model based Reinforcement Learning},
  author={Higuera, Juan Camilo Gamboa and Meger, David and Dudek, Gregory},
  journal={arXiv preprint arXiv:1803.02291},
  year={2018}
}

@inproceedings{deisenroth2011pilco,
  title={PILCO: A model-based and data-efficient approach to policy search},
  author={Deisenroth, Marc and Rasmussen, Carl E},
  booktitle={Proceedings of the 28th International Conference on machine learning (ICML-11)},
  pages={465--472},
  year={2011}
}

@inproceedings{gal2016deeppilco,
  title={Improving PILCO with Bayesian neural network dynamics models},
  author={Gal, Yarin and McAllister, Rowan and Rasmussen, Carl Edward},
  booktitle={Data-Efficient Machine Learning workshop, ICML},
  year={2016}
}

@article{rusu2016progressive,
  title={Progressive neural networks},
  author={Rusu, Andrei A and Rabinowitz, Neil C and Desjardins, Guillaume and Soyer, Hubert and Kirkpatrick, James and Kavukcuoglu, Koray and Pascanu, Razvan and Hadsell, Raia},
  journal={arXiv preprint arXiv:1606.04671},
  year={2016}
}

@inproceedings{teh2017distral,
  title={Distral: Robust multitask reinforcement learning},
  author={Teh, Yee and Bapst, Victor and Czarnecki, Wojciech M and Quan, John and Kirkpatrick, James and Hadsell, Raia and Heess, Nicolas and Pascanu, Razvan},
  booktitle={Advances in Neural Information Processing Systems},
  pages={4499--4509},
  year={2017}
}

@article{sutton1991dyna,
  title={Dyna, an integrated architecture for learning, planning, and reacting},
  author={Sutton, Richard S},
  journal={ACM SIGART Bulletin},
  volume={2},
  number={4},
  pages={160--163},
  year={1991},
  publisher={ACM}
}

@incollection{ha2018worldmodels,
  title = {Recurrent World Models Facilitate Policy Evolution},
  author = {Ha, David and Schmidhuber, J{\"u}rgen},
  booktitle = {Advances in Neural Information Processing Systems 31},
  pages = {2451--2463},
  year = {2018},
  url = {https://papers.nips.cc/paper/7512-recurrent-world-models-facilitate-policy-evolution},
  note="\url{https://worldmodels.github.io}",
}

@article{henaff2018planbybackprop,
  title={Model-Based Planning with Discrete and Continuous Actions},
  author={Henaff, Mikael and Whitney, William F and LeCun, Yann},
  journal={arXiv preprint arXiv:1705.07177},
  year={2018}
}

@inproceedings{heess2015svg,
  title={Learning continuous control policies by stochastic value gradients},
  author={Heess, Nicolas and Wayne, Gregory and Silver, David and Lillicrap, Tim and Erez, Tom and Tassa, Yuval},
  booktitle={Advances in Neural Information Processing Systems},
  pages={2944--2952},
  year={2015}
}

@inproceedings{finn2017foresight,
  title={Deep visual foresight for planning robot motion},
  author={Finn, Chelsea and Levine, Sergey},
  booktitle={Robotics and Automation (ICRA), 2017 IEEE International Conference on},
  pages={2786--2793},
  year={2017},
  organization={IEEE}
}

@article{kingma2013vae,
  title={Auto-encoding variational bayes},
  author={Kingma, Diederik P and Welling, Max},
  journal={arXiv preprint arXiv:1312.6114},
  year={2013}
}

@article{rezende2014vae,
  title={Stochastic backpropagation and approximate inference in deep generative models},
  author={Rezende, Danilo Jimenez and Mohamed, Shakir and Wierstra, Daan},
  journal={arXiv preprint arXiv:1401.4082},
  year={2014}
}

@article{rao2009control,
  title={A survey of numerical methods for optimal control},
  author={Rao, Anil V},
  journal={Advances in the Astronautical Sciences},
  volume={135},
  number={1},
  pages={497--528},
  year={2009},
  publisher={Univelt, Inc.}
}

@article{weber2017i2a,
  title={Imagination-augmented agents for deep reinforcement learning},
  author={Weber, Th{\'e}ophane and Racani{\`e}re, S{\'e}bastien and Reichert, David P and Buesing, Lars and Guez, Arthur and Rezende, Danilo Jimenez and Badia, Adria Puigdom{\`e}nech and Vinyals, Oriol and Heess, Nicolas and Li, Yujia and others},
  journal={arXiv preprint arXiv:1707.06203},
  year={2017}
}

@inproceedings{oh2015atari,
  title={Action-conditional video prediction using deep networks in atari games},
  author={Oh, Junhyuk and Guo, Xiaoxiao and Lee, Honglak and Lewis, Richard L and Singh, Satinder},
  booktitle={Advances in Neural Information Processing Systems},
  pages={2863--2871},
  year={2015}
}

@article{kurutach2018modeltrpo,
  title={Model-ensemble trust-region policy optimization},
  author={Kurutach, Thanard and Clavera, Ignasi and Duan, Yan and Tamar, Aviv and Abbeel, Pieter},
  journal={arXiv preprint arXiv:1802.10592},
  year={2018}
}

@inproceedings{kalweit2017modelddpg,
  title={Uncertainty-driven Imagination for Continuous Deep Reinforcement Learning},
  author={Kalweit, Gabriel and Boedecker, Joschka},
  booktitle={Conference on Robot Learning},
  pages={195--206},
  year={2017}
}

@inproceedings{pathak2017mario,
  title={Curiosity-driven exploration by self-supervised prediction},
  author={Pathak, Deepak and Agrawal, Pulkit and Efros, Alexei A and Darrell, Trevor},
  booktitle={International Conference on Machine Learning (ICML)},
  volume={2017},
  year={2017}
}

@inproceedings{chung2015vrnn,
  title={A recurrent latent variable model for sequential data},
  author={Chung, Junyoung and Kastner, Kyle and Dinh, Laurent and Goel, Kratarth and Courville, Aaron C and Bengio, Yoshua},
  booktitle={Advances in neural information processing systems},
  pages={2980--2988},
  year={2015}
}

@inproceedings{van2017vq,
  title={Neural discrete representation learning},
  author={van den Oord, Aaron and Vinyals, Oriol and others},
  booktitle={Advances in Neural Information Processing Systems},
  pages={6309--6318},
  year={2017}
}

@article{hoffman2013svi,
  title={Stochastic variational inference},
  author={Hoffman, Matthew D and Blei, David M and Wang, Chong and Paisley, John},
  journal={The Journal of Machine Learning Research},
  volume={14},
  number={1},
  pages={1303--1347},
  year={2013},
  publisher={JMLR. org}
}

@phdthesis{richards2005mpc,
  title={Robust constrained model predictive control},
  author={Richards, Arthur George},
  year={2005},
  school={Massachusetts Institute of Technology}
}

@article{rubinstein1997cem,
  title={Optimization of computer simulation models with rare events},
  author={Rubinstein, Reuven Y},
  journal={European Journal of Operational Research},
  volume={99},
  number={1},
  pages={89--112},
  year={1997},
  publisher={Elsevier}
}
@inproceedings{hansen1996cma,
  title={Adapting arbitrary normal mutation distributions in evolution strategies: The covariance matrix adaptation},
  author={Hansen, Nikolaus and Ostermeier, Andreas},
  booktitle={Evolutionary Computation, 1996., Proceedings of IEEE International Conference on},
  pages={312--317},
  year={1996},
  organization={IEEE}
}
@article{tassa2018dmcontrol,
  title={DeepMind Control Suite},
  author={Tassa, Yuval and Doron, Yotam and Muldal, Alistair and Erez, Tom and Li, Yazhe and Casas, Diego de Las and Budden, David and Abdolmaleki, Abbas and Merel, Josh and Lefrancq, Andrew and others},
  journal={arXiv preprint arXiv:1801.00690},
  year={2018}
}
@article{mackay1992infogain,
  title={Information-based objective functions for active data selection},
  author={MacKay, David JC},
  journal={Neural computation},
  volume={4},
  number={4},
  pages={590--604},
  year={1992},
  publisher={MIT Press}
}
@article{wayne2018merlin,
  title={Unsupervised Predictive Memory in a Goal-Directed Agent},
  author={Wayne, Greg and Hung, Chia-Chun and Amos, David and Mirza, Mehdi and Ahuja, Arun and Grabska-Barwinska, Agnieszka and Rae, Jack and Mirowski, Piotr and Leibo, Joel Z and Santoro, Adam and others},
  journal={arXiv preprint arXiv:1803.10760},
  year={2018}
}
@article{henaff2017planbybackprop,
  author    = {Mikael Henaff and William F. Whitney and Yann LeCun},
  title     = {Model-Based Planning in Discrete Action Spaces},
  journal   = {CoRR},
  volume    = {abs/1705.07177},
  year      = {2017},
  url       = {http://arxiv.org/abs/1705.07177},
  archivePrefix = {arXiv},
  eprint    = {1705.07177},
  timestamp = {Wed, 07 Jun 2017 14:42:08 +0200},
  biburl    = {https://dblp.org/rec/bib/journals/corr/HenaffWL17},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{gemici2017temporalmemory,
  title={Generative Temporal Models with Memory},
  author={Gemici, Mevlana and Hung, Chia-Chun and Santoro, Adam and Wayne, Greg and Mohamed, Shakir and Rezende, Danilo J and Amos, David and Lillicrap, Timothy},
  journal={arXiv preprint arXiv:1702.04649},
  year={2017}
}
@inproceedings{higgins2016beta,
  title={beta-vae: Learning basic visual concepts with a constrained variational framework},
  author={Higgins, Irina and Matthey, Loic and Pal, Arka and Burgess, Christopher and Glorot, Xavier and Botvinick, Matthew and Mohamed, Shakir and Lerchner, Alexander},
  booktitle={International Conference on Learning Representations},
  year={2016}
}
@article{kingma2014adam,
  title={Adam: A method for stochastic optimization},
  author={Kingma, Diederik P and Ba, Jimmy},
  journal={arXiv preprint arXiv:1412.6980},
  year={2014}
}
@article{wayne2018unsupervised,
  title={Unsupervised Predictive Memory in a Goal-Directed Agent},
  author={Wayne, Greg and Hung, Chia-Chun and Amos, David and Mirza, Mehdi and Ahuja, Arun and Grabska-Barwinska, Agnieszka and Rae, Jack and Mirowski, Piotr and Leibo, Joel Z and Santoro, Adam and others},
  journal={arXiv preprint arXiv:1803.10760},
  year={2018}
}
@article{chiappa2017recurrent,
  title={Recurrent environment simulators},
  author={Chiappa, Silvia and Racaniere, S{\'e}bastien and Wierstra, Daan and Mohamed, Shakir},
  journal={arXiv preprint arXiv:1704.02254},
  year={2017}
}
@inproceedings{mnih2016a3c,
  title={Asynchronous methods for deep reinforcement learning},
  author={Mnih, Volodymyr and Badia, Adria Puigdomenech and Mirza, Mehdi and Graves, Alex and Lillicrap, Timothy and Harley, Tim and Silver, David and Kavukcuoglu, Koray},
  booktitle={International Conference on Machine Learning},
  pages={1928--1937},
  year={2016}
}
@article{schulman2017ppo,
  title={Proximal policy optimization algorithms},
  author={Schulman, John and Wolski, Filip and Dhariwal, Prafulla and Radford, Alec and Klimov, Oleg},
  journal={arXiv preprint arXiv:1707.06347},
  year={2017}
}
@article{hafner2017tfagents,
  title={TensorFlow Agents: Efficient Batched Reinforcement Learning in TensorFlow},
  author={Hafner, Danijar and Davidson, James and Vanhoucke, Vincent},
  journal={arXiv preprint arXiv:1709.02878},
  year={2017}
}
@article{barth2018d4pg,
  title={Distributed Distributional Deterministic Policy Gradients},
  author={Barth-Maron, Gabriel and Hoffman, Matthew W and Budden, David and Dabney, Will and Horgan, Dan and Muldal, Alistair and Heess, Nicolas and Lillicrap, Timothy},
  journal={arXiv preprint arXiv:1804.08617},
  year={2018}
}
@inproceedings{alemi18fixing,
  title={Fixing a Broken ELBO},
  author={Alemi, Alexander A and Poole, Ben and Fischer, Ian and Dillon, Joshua V and Saurous, Rif A and Murphy, Kevin},
  year={2018},
  booktitle={Proceedings of the 35th International Conference on Machine Learning (ICML-18)}
}
@article{szegedy2013intriguing,
  title={Intriguing properties of neural networks},
  author={Szegedy, Christian and Zaremba, Wojciech and Sutskever, Ilya and Bruna, Joan and Erhan, Dumitru and Goodfellow, Ian and Fergus, Rob},
  journal={arXiv preprint arXiv:1312.6199},
  year={2013}
}
@article{chua2018deep,
   author = {{Chua}, K. and {Calandra}, R. and {McAllister}, R. and {Levine}, S.},
    title = "{Deep Reinforcement Learning in a Handful of Trials using Probabilistic Dynamics Models}",
  journal = {ArXiv e-prints},
archivePrefix = "arXiv",
   eprint = {1805.12114},
 primaryClass = "cs.LG",
 keywords = {Computer Science - Learning, Computer Science - Artificial Intelligence, Computer Science - Robotics, Statistics - Machine Learning},
     year = 2018,
    month = may,
}
@article{karl2016dvbf,
  title={Deep variational bayes filters: Unsupervised learning of state space models from raw data},
  author={Karl, Maximilian and Soelch, Maximilian and Bayer, Justin and van der Smagt, Patrick},
  journal={arXiv preprint arXiv:1605.06432},
  year={2016}
}
@article{krishnan2015deepkalman,
  title={Deep kalman filters},
  author={Krishnan, Rahul G and Shalit, Uri and Sontag, David},
  journal={arXiv preprint arXiv:1511.05121},
  year={2015}
}
@article{gregor2018tdvae,
  title={Temporal Difference Variational Auto-Encoder},
  author={Gregor, Karol and Besse, Frederic},
  journal={arXiv preprint arXiv:1806.03107},
  year={2018}
}
@article{chua2018pets,
  title={Deep Reinforcement Learning in a Handful of Trials using Probabilistic Dynamics Models},
  author={Chua, Kurtland and Calandra, Roberto and McAllister, Rowan and Levine, Sergey},
  journal={arXiv preprint arXiv:1805.12114},
  year={2018}
}
@article{buckman2018steve,
  title={Sample-Efficient Reinforcement Learning with Stochastic Ensemble Value Expansion},
  author={Buckman, Jacob and Hafner, Danijar and Tucker, George and Brevdo, Eugene and Lee, Honglak},
  journal={arXiv preprint arXiv:1807.01675},
  year={2018}
}
@article{doerr2018prssm,
  title={Probabilistic Recurrent State-Space Models},
  author={Doerr, Andreas and Daniel, Christian and Schiegg, Martin and Nguyen-Tuong, Duy and Schaal, Stefan and Toussaint, Marc and Trimpe, Sebastian},
  journal={arXiv preprint arXiv:1801.10395},
  year={2018}
}
@inproceedings{lamb2016professor,
  title={Professor forcing: A new algorithm for training recurrent networks},
  author={Lamb, Alex M and GOYAL, Anirudh Goyal ALIAS PARTH and Zhang, Ying and Zhang, Saizheng and Courville, Aaron C and Bengio, Yoshua},
  booktitle={Advances In Neural Information Processing Systems},
  pages={4601--4609},
  year={2016}
}
@article{srinivas2018upn,
  title={Universal Planning Networks},
  author={Srinivas, Aravind and Jabri, Allan and Abbeel, Pieter and Levine, Sergey and Finn, Chelsea},
  journal={arXiv preprint arXiv:1804.00645},
  year={2018}
}
@inproceedings{nair2010relu,
  title={Rectified linear units improve restricted boltzmann machines},
  author={Nair, Vinod and Hinton, Geoffrey E},
  booktitle={Proceedings of the 27th international conference on machine learning (ICML-10)},
  pages={807--814},
  year={2010}
}
@article{cho2014gru,
  title={Learning phrase representations using RNN encoder-decoder for statistical machine translation},
  author={Cho, Kyunghyun and Van Merri{\"e}nboer, Bart and Gulcehre, Caglar and Bahdanau, Dzmitry and Bougares, Fethi and Schwenk, Holger and Bengio, Yoshua},
  journal={arXiv preprint arXiv:1406.1078},
  year={2014}
}
@inproceedings{bengio2015scheduled,
  title={Scheduled sampling for sequence prediction with recurrent neural networks},
  author={Bengio, Samy and Vinyals, Oriol and Jaitly, Navdeep and Shazeer, Noam},
  booktitle={Advances in Neural Information Processing Systems},
  pages={1171--1179},
  year={2015}
}
@inproceedings{talvitie2014hallucinated,
  title={Model Regularization for Stable Sample Rollouts.},
  author={Talvitie, Erik},
  booktitle={UAI},
  pages={780--789},
  year={2014}
}
@inproceedings{venkatraman2015dad,
  title={Improving Multi-Step Prediction of Learned Time Series Models.},
  author={Venkatraman, Arun and Hebert, Martial and Bagnell, J Andrew},
  booktitle={AAAI},
  pages={3024--3030},
  year={2015}
}
@article{igl2018dvrl,
  title={Deep Variational Reinforcement Learning for POMDPs},
  author={Igl, Maximilian and Zintgraf, Luisa and Le, Tuan Anh and Wood, Frank and Whiteson, Shimon},
  journal={arXiv preprint arXiv:1806.02426},
  year={2018}
}
@article{silver2017alphago,
  title={Mastering the game of Go without human knowledge},
  author={Silver, David and Schrittwieser, Julian and Simonyan, Karen and Antonoglou, Ioannis and Huang, Aja and Guez, Arthur and Hubert, Thomas and Baker, Lucas and Lai, Matthew and Bolton, Adrian and others},
  journal={Nature},
  volume={550},
  number={7676},
  pages={354},
  year={2017},
  publisher={Nature Publishing Group}
}
@inproceedings{tassa2012mpc,
  title={Synthesis and stabilization of complex behaviors through online trajectory optimization},
  author={Tassa, Yuval and Erez, Tom and Todorov, Emanuel},
  booktitle={Intelligent Robots and Systems (IROS), 2012 IEEE/RSJ International Conference on},
  pages={4906--4913},
  year={2012},
  organization={IEEE}
}
@inproceedings{tassa2014mpc,
  title={Control-limited differential dynamic programming},
  author={Tassa, Yuval and Mansard, Nicolas and Todorov, Emo},
  booktitle={Robotics and Automation (ICRA), 2014 IEEE International Conference on},
  pages={1168--1175},
  year={2014},
  organization={IEEE}
}
@article{moravvcik2017deepstack,
  title={Deepstack: Expert-level artificial intelligence in heads-up no-limit poker},
  author={Moravčík, Matej and Schmid, Martin and Burch, Neil and Lisý, Viliam and Morrill, Dustin and Bard, Nolan and Davis, Trevor and Waugh, Kevin and Johanson, Michael and Bowling, Michael},
  journal={Science},
  volume={356},
  number={6337},
  pages={508--513},
  year={2017},
  publisher={American Association for the Advancement of Science}
}
@article{mnih2015dqn,
  title={Human-level control through deep reinforcement learning},
  author={Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and Rusu, Andrei A and Veness, Joel and Bellemare, Marc G and Graves, Alex and Riedmiller, Martin and Fidjeland, Andreas K and Ostrovski, Georg and others},
  journal={Nature},
  volume={518},
  number={7540},
  pages={529},
  year={2015},
  publisher={Nature Publishing Group}
}
@article{moerland2017learning,
  title={Learning multimodal transition dynamics for model-based reinforcement learning},
  author={Moerland, Thomas M and Broekens, Joost and Jonker, Catholijn M},
  journal={arXiv preprint arXiv:1705.00470},
  year={2017}
}
@article{ebert2017visualmpc,
  title={Self-supervised visual planning with temporal skip connections},
  author={Ebert, Frederik and Finn, Chelsea and Lee, Alex X and Levine, Sergey},
  journal={arXiv preprint arXiv:1710.05268},
  year={2017}
}
@article{dillon2017tfd,
  title={TensorFlow Distributions},
  author={Dillon, Joshua V and Langmore, Ian and Tran, Dustin and Brevdo, Eugene and Vasudevan, Srinivas and Moore, Dave and Patton, Brian and Alemi, Alex and Hoffman, Matt and Saurous, Rif A},
  journal={arXiv preprint arXiv:1711.10604},
  year={2017}
}
@inproceedings{agrawal2016poking,
  title={Learning to poke by poking: Experiential learning of intuitive physics},
  author={Agrawal, Pulkit and Nair, Ashvin V and Abbeel, Pieter and Malik, Jitendra and Levine, Sergey},
  booktitle={Advances in Neural Information Processing Systems},
  pages={5074--5082},
  year={2016}
}
@inproceedings{bellemare2016actiongap,
  title={Increasing the Action Gap: New Operators for Reinforcement Learning.},
  author={Bellemare, Marc G and Ostrovski, Georg and Guez, Arthur and Thomas, Philip S and Munos, R{\'e}mi},
  booktitle={AAAI},
  pages={1476--1483},
  year={2016}
}
@article{kingma2018glow,
  title={Glow: Generative flow with invertible 1x1 convolutions},
  author={Kingma, Diederik P and Dhariwal, Prafulla},
  journal={arXiv preprint arXiv:1807.03039},
  year={2018}
}
@article{ebert2018foresight,
  title={Visual Foresight: Model-Based Deep Reinforcement Learning for Vision-Based Robotic Control},
  author={Ebert, Frederik and Finn, Chelsea and Dasari, Sudeep and Xie, Annie and Lee, Alex and Levine, Sergey},
  journal={arXiv preprint arXiv:1812.00568},
  year={2018}
}
@inproceedings{krishnan2017ssmelbo,
  title={Structured Inference Networks for Nonlinear State Space Models.},
  author={Krishnan, Rahul G and Shalit, Uri and Sontag, David},
  booktitle={AAAI},
  pages={2101--2109},
  year={2017}
}
</script>
<script src="lib/blazy.js"></script>
<script>
  var bLazy = new Blazy({
    success: function(){
      updateCounter();
    }
  });
  var imageLoaded = 0;
  function updateCounter() {
    imageLoaded++;
    console.log("blazy image loaded: "+imageLoaded);
  }
</script>