-
Notifications
You must be signed in to change notification settings - Fork 0
/
learning.bib
606 lines (523 loc) · 23.5 KB
/
learning.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
@comment{----2023 ---}
@inproceedings{cardozo23prevelance,
author = {Nicol\'as Cardozo and Ivana Dusparic and Christian Cabrera},
booktitle = {Conference on AI Engineering -- Software Engineering for AI},
month = {May},
pages = {37--42},
publisher = {IEEE},
series = {CAIN'23},
title = {Prevalence of Code Smells in Reinforcement Learning Projects},
year = {2023},
eprint={2303.10236},
archivePrefix={arXiv},
primaryClass={cs.SE}}
@INPROCEEDINGS {tambon2023,
author = {F. Tambon and V. Majdinasab and A. Nikanjam and F. Khomh and G. Antoniol},
booktitle = {2023 IEEE Conference on Software Testing, Verification and Validation (ICST)},
title = {Mutation Testing of Deep Reinforcement Learning Based on Real Faults},
year = {2023},
volume = {},
issn = {2159-4848},
pages = {188-198},
doi = {10.1109/ICST57152.2023.00026},
url = {https://doi.ieeecomputersociety.org/10.1109/ICST57152.2023.00026},
publisher = {IEEE Computer Society},
address = {Los Alamitos, CA, USA},
month = {apr}
}
@article{Biagiola2023,
author = {Biagiola, Matteo and Tonella, Paolo},
title = {Testing of Deep Reinforcement Learning Agents with Surrogate Models},
year = {2023},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
issn = {1049-331X},
url = {https://doi.org/10.1145/3631970},
doi = {10.1145/3631970},
abstract = {Deep Reinforcement Learning (DRL) has received a lot of attention from the research community in recent years. As the technology moves away from game playing to practical contexts, such as autonomous vehicles and robotics, it is crucial to evaluate the quality of DRL agents. In this paper, we propose a search-based approach to test such agents. Our approach, implemented in a tool called Indago, trains a classifier on failure and non-failure environment (i.e., pass) configurations resulting from the DRL training process. The classifier is used at testing time as a surrogate model for the DRL agent execution in the environment, predicting the extent to which a given environment configuration induces a failure of the DRL agent under test. The failure prediction acts as a fitness function, guiding the generation towards failure environment configurations, while saving computation time by deferring the execution of the DRL agent in the environment to those configurations that are more likely to expose failures. Experimental results show that our search-based approach finds 50\% more failures of the DRL agent than state-of-the-art techniques. Moreover, such failures are, on average, 78\% more diverse; similarly, the behaviors of the DRL agent induced by failure configurations are 74\% more diverse.},
note = {Just Accepted},
journal = {ACM Trans. Softw. Eng. Methodol.},
month = {nov},
keywords = {Software Testing, Reinforcement Learning}
}
@ARTICLE {Zolfagharian2023,
author = {A. Zolfagharian and M. Abdellatif and L. C. Briand and M. Bagherzadeh and R. S},
journal = {IEEE Transactions on Software Engineering},
title = {A Search-Based Testing Approach for Deep Reinforcement Learning Agents},
year = {2023},
volume = {49},
number = {07},
issn = {1939-3520},
pages = {3715-3735},
doi = {10.1109/TSE.2023.3269804},
publisher = {IEEE Computer Society},
address = {Los Alamitos, CA, USA},
month = {jul}
}
@comment{----2022 ---}
@article{wan22,
title={Toward Discovering Options that Achieve Faster Planning},
author={Wan, Yi and Sutton, Richard S},
journal={arXiv preprint arXiv:2205.12515},
year={2022}}
@article{khetarpal22,
title={Towards continual reinforcement learning: A review and perspectives},
author={Khetarpal, Khimya and Riemer, Matthew and Rish, Irina and Precup, Doina},
journal={Journal of Artificial Intelligence Research},
volume={75},
pages={1401--1476},
year={2022}}
@INPROCEEDINGS{filho2022,
author={Filho, Roberto Rodrigues and Alberts, Elvin and Gerostathopoulos, Ilias and Porter, Barry and Costa, Fábio M.},
booktitle={2022 International Symposium on Software Engineering for Adaptive and Self-Managing Systems},
series={SEAMS'22},
title={Emergent Web Server: An Exemplar to Explore Online Learning in Compositional Self-Adaptive Systems},
year={2022},
volume={},
number={},
pages={36-42},
doi={10.1145/3524844.3528079}}
@comment{----2021 ---}
@article{cruz-benito21,
author = {Cruz-Benito, Juan and Vishwakarma, Sanjay and Martin-Fernandez, Francisco and Faro, Ismael},
title = {Automated Source Code Generation and Auto-Completion Using Deep Learning: Comparing and Discussing Current Language Model-Related Approaches},
journal = {AI},
volume = {2},
year = {2021},
number = {1},
pages = {1--16},
issn = {2673-2688},
doi = {10.3390/ai2010001}}
@inproceedings{cardozo21,
title={Adaptation to Unknown Situations as the Holy Grail of Learning-Based Self-Adaptive Systems: Research Directions},
author={Cardozo, Nicol{\'a}s and Dusparic, Ivana},
booktitle={International Symposium on Software Engineering for Adaptive and Self-Managing Systems},
series={SEAMS'21},
pages={252--253},
year={2021},
organization={IEEE}}
@comment{---2020 ---}
@ARTICLE{Zhang2020,
author={Zhang, Jie M. and Harman, Mark and Ma, Lei and Liu, Yang},
journal={IEEE Transactions on Software Engineering},
title={Machine Learning Testing: Survey, Landscapes and Horizons},
year={2022},
volume={48},
number={1},
pages={1-36},
doi={10.1109/TSE.2019.2962027}}
@article{minecraft2020,
author = {Stephanie Milani and Nicholay Topin and Brandon Houghton and William H. Guss and Sharada P. Mohanty and Keisuke Nakata and Oriol Vinyals and Noboru Sean Kuno},
title = {Retrospective Analysis of the 2019 MineRL Competition on Sample Efficient
Reinforcement Learning},
journal = {CoRR},
volume = {abs/2003.05012},
year = {2020},
url = {https://arxiv.org/abs/2003.05012},
archivePrefix = {arXiv},
eprint = {2003.05012},
timestamp = {Thu, 10 Dec 2020 13:37:35 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2003-05012.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{sharma2020,
author={Sharma, Prerna and Chaudhary, Vikas and Malhotra, Nakul and Gupta, Nikita and Mittal, Mohit},
editor={Khanna, Ashish and Gupta, Deepak and Bhattacharyya, Siddhartha and Snasel, Vaclav and Platos, Jan and Hassanien, Aboul Ella},
title={Dynamic Web with Automatic Code Generation Using Deep Learning},
booktitle="International Conference on Innovative Computing and Communications",
year="2020",
publisher="Springer Singapore",
address="Singapore",
pages={687--697}}
@article{wang2020,
author={W. {Wang} and Y. {Zhang} and Y. {Sui} and Y. {Wan} and Z. {Zhao} and J. {Wu} and P. {Yu} and G. {Xu}},
journal={IEEE Transactions on Software Engineering},
title={Reinforcement-Learning-Guided Source Code Summarization via Hierarchical Attention},
year={2020},
volume={},
number={},
pages={1-1},
doi={10.1109/TSE.2020.2979701}}
@misc{devanbu2020deep,
title={Deep Learning \& Software Engineering: State of Research and Future Directions},
author={Prem Devanbu and Matthew Dwyer and Sebastian Elbaum and Michael Lowry and Kevin Moran and Denys Poshyvanyk and Baishakhi Ray and Rishabh Singh and Xiangyu Zhang},
year={2020},
eprint={2009.08525},
archivePrefix={arXiv}}
@inproceedings{trujillo20,
author = {Trujillo, Miller and Linares-V\'{a}squez, Mario and Escobar-Vel\'{a}squez, Camilo and Dusparic, Ivana and Cardozo, Nicol\'{a}s},
title = {Does Neuron Coverage Matter for Deep Reinforcement Learning? A Preliminary Study},
year = {2020},
isbn = {9781450379632},
publisher = {ACM},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3387940.3391462},
booktitle = {Proceedings of the IEEE/ACM 42nd International Conference on Software Engineering Workshops},
pages = {215–220},
numpages = {6}}
@inproceedings{restuccia20,
author={Restuccia, Francesco and Melodia, Tommaso},
booktitle={Conference on Computer Communications},
title={DeepWiERL: Bringing Deep Reinforcement Learning to the Internet of Self-Adaptive Things},
series={INFOCOM'20},
publisher={IEEE},
year={2020},
volume={},
number={},
pages={844-853},
doi={10.1109/INFOCOM41043.2020.9155461}}
@inproceedings{palm2020,
author="Palm, Alexander and Metzger, Andreas and Pohl, Klaus",
editor="Dustdar, Schahram and Yu, Eric and Salinesi, Camille and Rieu, Dominique and Pant, Vik",
title="Online Reinforcement Learning for Self-adaptive Information Systems",
booktitle="Advanced Information Systems Engineering",
year="2020",
publisher="Springer",
address="Cham",
pages="169--184",
isbn="978-3-030-49435-3"}
@comment{---2019 ---}
@inproceedings{sun2019,
author = {Zeyu Sun and Qihao Zhu and Lili Mou and Yingfei Xiong and Ge Li and Lu Zhang},
title = {A Grammar-Based Structural {CNN} Decoder for Code Generation},
booktitle = {{AAAI} Conference on Artificial Intelligence},
series = {AAAI'19},
pages = {7055--7062},
publisher = {{AAAI} Press},
year = {2019},
month = {01},
location = {Honolulu, Hawaii, USA},
doi = {10.1609/aaai.v33i01.33017055}}
@article{wang19,
title = {Adaptive and large-scale service composition based on deep reinforcement learning},
journal = {Knowledge-Based Systems},
volume = {180},
pages = {75-90},
year = {2019},
issn = {0950-7051},
doi = {https://doi.org/10.1016/j.knosys.2019.05.020},
url = {https://www.sciencedirect.com/science/article/pii/S0950705119302266},
author = {Hongbing Wang and Mingzhu Gu and Qi Yu and Yong Tao and Jiajie Li and Huanhuan Fei and Jia Yan and Wei Zhao and Tianjing Hong}}
@inproceedings{tiwang2019,
author={R. {Tiwang} and T. {Oladunni} and W. {Xu}},
booktitle={2019 SoutheastCon},
title={A Deep Learning Model for Source Code Generation},
year={2019},
volume={},
number={},
pages={1-7},
doi={10.1109/SoutheastCon42311.2019.9020360}}
@article{zhang2019,
title={{DAC: The double actor-critic architecture for learning options}},
author={Zhang, Shangtong and Whiteson, Shimon},
journal={Advances in Neural Information Processing Systems},
volume={32},
year={2019}}
@article{edwards19,
author = {Ashley D. Edwards and Charles L. {Isbell Jr.}},
title = {Perceptual Values from Observation},
journal = {CoRR},
volume = {abs/1905.07861},
year = {2019}}
@inproceedings{gueriau19,
Author = {Maxime Gu\'eriau and Nicol\'as Cardozo and Ivana Dusparic},
Title={Constructivist Approach to State Space Adaptation in Reinforcement Learning},
Booktitle={{International Conference on Self-Adaptive and Self-Organizing Systems (SASO)}},
Year={2019},
Series = {SASO'19},
Location={Umea, Sweden},
Publisher={IEEE},
Month={June}}
@INPROCEEDINGS{dangelo19,
author={M. {D'Angelo} and S. {Gerasimou} and S. {Ghahremani} and J. {Grohmann} and I. {Nunes} and E. {Pournaras} and S. {Tomforde}},
booktitle={International Symposium on Software Engineering for Adaptive and Self-Managing Systems},
title={On Learning in Collective Self-Adaptive Systems: State of Practice and a 3D Framework},
publisher={ACM},
year={2019},
pages={13-24},
series={SEAMS'19},
doi={10.1109/SEAMS.2019.00012},
ISSN={2157-2305},
month={May}}
@techreport{cardozo2019deeptest,
Author = {Nicol\'as Cardozo and Ivana Dusparic and Mario Linares-V{\'a}squez},
Booktitle = {DeepTest'19},
Title = {Perspectives in Testing Deep {RL}},
Year = {2019}}
@comment{---2018 ---}
@inproceedings{fulton18,
author = {Nathan Fulton and Andr{\'{e}} Platzer},
title = {Safe Reinforcement Learning via Formal Methods: Toward Safe Control Through Proof and Learning},
booktitle = {{AAAI'18}},
month = {February},
year = {2018}}
@book{sutton18,
author = {{Richard S. Sutton and Andrew G. Barto}},
isbn = {9780262039246},
pages = {550},
publisher = {MIT Press},
title = {{Reinforcement Learning, An Introduction}},
edition = {Second Edition},
year = {2018}}
@comment{---2017 ---}
@inproceedings{katz17,
author = {Guy Katz and Clark W. Barrett and David L. Dill and Kyle Julian and Mykel J. Kochenderfer},
title = {Reluplex: An Efficient {SMT} Solver for Verifying Deep Neural Networks},
booktitle = {International Conference on Computer Aided Verification},
series = {CAV'17},
month = {July},
pages = {97--117},
year = {2017}}
@inproceedings{cardozo17,
author = {Cardozo, Nicol\'{a}s and Dusparic, Ivana and Castro, Jorge H.},
title = {Peace {COrP}: Learning to solve conflicts between contexts},
booktitle = {International Workshop on Context-Oriented Programming},
series = {COP'17},
isbn = {9781450349710},
publisher={ACM},
doi = {10.1145/3117802.3117803},
year = {2017},
address = {New York, NY, USA} }
@article{marinescu17,
title={Prediction-Based Multi-Agent Reinforcement Learning in Inherently Non-Stationary Environments},
author={Marinescu, Andrei and Dusparic, Ivana and Clarke, Siobh{\'a}n},
journal={Transactions on Autonomous and Adaptive Systems},
volume={12},
number={2},
series={TAAS},
pages={9},
year={2017},
publisher={ACM}}
@comment{---2016 ---}
@InProceedings{Caporuscio2016,
author="Caporuscio, M.
and D'Angelo, M.
and Grassi, V.
and Mirandola, R.",
editor="Aiello, Marco
and Johnsen, Einar Broch
and Dustdar, Schahram
and Georgievski, Ilche",
title="Reinforcement Learning Techniques for Decentralized Self-adaptive Service Assembly",
booktitle="Service-Oriented and Cloud Computing",
year="2016",
publisher="Springer International Publishing",
address="Cham",
pages="53--68",
abstract="This paper proposes a self-organizing fully decentralized solution for the service assembly problem, whose goal is to guarantee a good overall quality for the delivered services, ensuring at the same time fairness among the participating peers. The main features of our solution are: (i) the use of a gossip protocol to support decentralized information dissemination and decision making, and (ii) the use of a reinforcement learning approach to make each peer able to learn from its experience the service selection rule to be followed, thus overcoming the lack of global knowledge. Besides, we explicitly take into account load-dependent quality attributes, which lead to the definition of a service selection rule that drives the system away from overloading conditions that could adversely affect quality and fairness. Simulation experiments show that our solution self-adapts to occurring variations by quickly converging to viable assemblies maintaining the specified quality and fairness objectives.",
isbn="978-3-319-44482-6"
}
@INPROCEEDINGS{clark16,
author={A. J. {Clark} and B. {DeVries} and J. M. {Moore} and B. H. C. {Cheng} and P. K. {McKinley}},
booktitle={Symposium Series on Computational Intelligence},
title={An evolutionary approach to discovering execution mode boundaries for adaptive controllers},
year={2016},
publisher={IEEE},
series={SSCI'16},
pages={1-8},
doi={10.1109/SSCI.2016.7850178},
month={Dec}}
@article{durugkar16,
author = {Ishan P. Durugkar and Clemens Rosenbaum and Stefan Dernbach and Sridhar Mahadevan},
title = {Deep Reinforcement Learning With Macro-Actions},
journal = {CoRR},
volume = {1606.04615},
year = {2016},
url = {http://arxiv.org/abs/1606.04615}}
@comment{----2015 ---}
@article{mnih15,
author = {Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and Rusu, Andrei A. and Veness, Joel and Bellemare, Marc G. and Graves, Alex and Riedmiller, Martin and Fidjeland, Andreas K. and Ostrovski, Georg and Petersen, Stig and Beattie, Charles and Sadik, Amir and Antonoglou, Ioannis and King, Helen and Kumaran, Dharshan and Wierstra, Daan and Legg, Shane and Hassabis, Demis},
issn = {00280836},
journal = {Nature},
month = feb,
number = {7540},
pages = {529--533},
title = {Human-level control through deep reinforcement learning},
volume = {518},
year = {2015}}
@inproceedings{sculley15,
author = {Sculley, D. and Holt, Gary and Golovin, Daniel and Davydov, Eugene and Phillips, Todd and Ebner, Dietmar and Chaudhary, Vinay and Young, Michael and Crespo, Jean-Francois and Dennison, Dan},
title = {Hidden Technical Debt in Machine Learning Systems},
year = {2015},
publisher = {MIT Press},
address = {Cambridge, MA, USA},
abstract = {Machine learning offers a fantastically powerful toolkit for building useful complex prediction systems quickly. This paper argues it is dangerous to think of these quick wins as coming for free. Using the software engineering framework of technical debt, we find it is common to incur massive ongoing maintenance costs in real-world ML systems. We explore several ML-specific risk factors to account for in system design. These include boundary erosion, entanglement, hidden feedback loops, undeclared consumers, data dependencies, configuration issues, changes in the external world, and a variety of system-level anti-patterns.},
booktitle = {Proceedings of the 28th International Conference on Neural Information Processing Systems - Volume 2},
pages = {2503–2511},
numpages = {9},
location = {Montreal, Canada},
series = {NIPS'15}
}
@comment{----2014 ---}
@inproceedings{sculley14,
title = {Machine Learning: The High Interest Credit Card of Technical Debt},
author = {D. Sculley and G. Holt and D. Golovin and E. Davydov and T. Phillips and D. Ebner and V. Chaudhary and M. Young},
year = {2014},
booktitle = {{Workshop on Software Engineering for Machine Learning}},
series = {SE4ML'14}}
@comment{----2013 ---}
@inproceedings{Griffith2013,
author = {Griffith, Shane and Subramanian, Kaushik and Scholz, Jonathan and Isbell, Charles L and Thomaz, Andrea L},
booktitle = {Advances in Neural Information Processing Systems},
editor = {C.J. Burges and L. Bottou and M. Welling and Z. Ghahramani and K.Q. Weinberger},
pages = {},
publisher = {Curran Associates, Inc.},
title = {Policy Shaping: Integrating Human Feedback with Reinforcement Learning},
url = {https://proceedings.neurips.cc/paper_files/paper/2013/file/e034fb6b66aacc1d48f445ddfb08da98-Paper.pdf},
volume = {26},
year = {2013}
}
@comment{----2012 ---}
@article{dusparic12taas,
title={Autonomic multi-policy optimization in pervasive systems: Overview and evaluation},
author={Dusparic, Ivana and Cahill, Vinny},
journal={Transactions on Autonomous and Adaptive Systems},
series={TAAS},
volume={7},
number={1},
pages={11},
year={2012},
publisher={ACM}}
@comment{----2007 ---}
@article{tesauro07,
author = {G. Tesauro},
journal = {IEEE Internet Computing},
year={2007},
number = {1},
volume = {11},
pages = {22--30},
title = {Reinforcement learning in autonomic computing: A manifesto and case studies}}
@comment{----2006 ---}
@inproceedings{yoshikawa06,
author = {Takeshi Yoshikawa and Masahito Kurihara},
title = {An Acquiring Method of Macro-Actions in Reinforcement Learning},
booktitle = {Proceedings of the {IEEE} International Conference on Systems, Man
and Cybernetics, Taipei, Taiwan, October 8-11, 2006},
pages = {4813--4817},
year = {2006},
doi = {10.1109/ICSMC.2006.385067}}
@comment{----2005 ---}
@inproceedings{girgin05,
author = {S. {Girgin} and F. {Polat}},
booktitle = {International Conference on Computational Intelligence for Modelling, Control and Automation and International Conference on Intelligent Agents, Web Technologies and Internet Commerce (CIMCA-IAWTIC'06)},
pages = {371-376},
title = {Option Discovery in Reinforcement Learning using Frequent Common Subsequences of Actions},
volume = {1},
year = {2005}}
@comment{----2004 ---}
@INPROCEEDINGS{elfwing04,
author={S. Elfwing and E. Uchibe and K. Doya and H. I. Christensen},
booktitle={IEEE/RSJ International Conference on Intelligent Robots and Systems},
title={Multi-agent reinforcement learning: using macro actions to learn a mating task},
year={2004},
series = {IROS'04},
volume={4},
pages={3164-3169},
OPTdoi={10.1109/IROS.2004.1389904},
month={Sept}}
@comment{----2002 ---}
@INPROCEEDINGS{pickett02,
author = {Marc Pickett and Andrew G. Barto},
title = {PolicyBlocks: An Algorithm for Creating Useful Macro-Actions in Reinforcement Learning},
booktitle = {Int. Conference on Machine Learning},
year = {2002},
pages = {506--513},
publisher = {Morgan Kaufmann}}
@inproceedings{stolle02,
address = {Berlin, Heidelberg},
author = {Stolle, Martin and Precup, Doina},
booktitle = {{Proceedings of the International Symposium on Abstraction, Reformulation and Approximation}},
isbn = {3540439412},
pages = {212--223},
publisher = {Springer-Verlag},
title = {Learning Options in Reinforcement Learning},
year = {2002}}
@comment{----1999 ---}
@article{sutton99,
author = {Richard S. Sutton and Doina Precup and Satinder Singh},
journal = {Artificial Intelligence},
pages = {181--211},
title = {{Between MDPs and semi-MDPs: A framework for temporal abstraction in reinforcement learning}},
volume = {112},
year = {1999}}
@comment{----1998 ---}
@inproceedings{sutton98intra,
title={Intra-Option Learning about Temporally Abstract Actions},
author={Sutton, Richard S. and Precup, Doina and Singh, Satinder P.},
booktitle={International Conference on Machine Learning},
series={ICML'98},
volume={98},
pages={556--564},
year={1998}}
@book{sutton98,
author = {Richard S. Sutton and Andrew G. Barto},
title = {Reinforcement Learning: An Introduction.},
year = {1998},
publisher = {Bradford Book. The MIT Press, Cambridge, Massachusetts}}
@techreport{mcgovern98,
author = {McGovern, Amy and Sutton, Richard S.},
title = {Macro-Actions in Reinforcement Learning: An Empirical Analysis},
year = {1998},
publisher = {University of Massachusetts},
address = {Amherst, MA, USA}}
@INPROCEEDINGS{mcgovern98nips,
author = {Amy Mcgovern},
title = {acQuire-macros: An Algorithm for Automatically Learning Macro-actions},
booktitle = {Workshop on Abstraction and Hierarchy in Reinforcement Learning},
series = {NIPS'98},
year = {1998}}
@inproceedings{randlov98,
author = {Jette Randl{\o}v},
title = {Learning Macro-actions in Reinforcement Learning},
booktitle = {Proceedings of the International Conference on Neural Information Processing Systems},
series = {NIPS'98},
year = {1998},
location = {Denver, CO},
pages = {1045--1051},
numpages = {7},
url = {http://dl.acm.org/citation.cfm?id=3009055.3009201},
publisher = {MIT Press},
address = {Cambridge, MA, USA}}
@comment{----1997 ---}
@inproceedings{mcgovern97,
author = {McGovern, Amy and Sutton, Richard S and Fagg, Andrew H},
booktitle = {{Grace Hopper celebration of women in computing}},
pages = {15},
title = {Roles of macro-actions in accelerating reinforcement learning},
volume = {1317},
year = {1997}}
@comment{----1996 ---}
@INPROCEEDINGS{humphrys96,
author = {Mark Humphrys},
title = {Action Selection methods using Reinforcement Learning},
booktitle = {Proceedings of the International Conference on Simulation of Adaptive Behavior},
year = {1996},
pages = {135--144},
publisher = {MIT Press}}
@comment{----1995 ---}
@TechReport{humphrys95,
author = {Humphrys, Mark},
title = {{W-learning: competition among selfish Q-learners}},
year = {1995},
month = {04},
institution = {University of Cambridge, Computer Laboratory},
doi = {10.48456/tr-362},
number = {UCAM-CL-TR-362}}
@comment{----1993 ---}
@inbook{fikes1993,
title={Learning and executing generalized robot plans},
author={Fikes, Richard E and Hart, Peter E and Nilsson, Nils J},
pages={485--503},
year={1993},
address={San Francisco, CA, USA},
publisher={Morgan Kaufmann}}
@comment{----1992 ---}
@article{watkins92,
author = {Watkins, Christopher J. C. H. and Dayan, Peter},
journal = {Machine Learning},
month = {May},
year={1992},
number = {3},
volume = {8},
pages = {279--292},
title = {Technical Note: Q-Learning}}