@inproceedings{e24883c505f14e52b575f883ee00053d,
title = "Towards finite-sample convergence of direct reinforcement learning",
abstract = "While direct, model-free reinforcement learning often performs better than model-based approaches in practice, only the latter have yet supported theoretical guarantees for finite-sample convergence. A major difficulty in analyzing the direct approach in an online setting is the absence of a definitive exploration strategy. We extend the notion of admissibility to direct reinforcement learning and show that standard Q-learning with optimistic initial values and constant learning rate is admissible. The notion justifies the use of a greedy strategy that we believe performs very well in practice and holds theoretical significance in deriving finite-sample convergence for direct reinforcement learning. We present empirical evidence that supports our idea.",
author = "Lim, {Shiau Hong} and Gerald DeJong",
note = "Copyright: Copyright 2011 Elsevier B.V., All rights reserved.; 16th European Conference on Machine Learning, ECML 2005 ; Conference date: 03-10-2005 Through 07-10-2005",
year = "2005",
doi = "10.1007/11564096_25",
language = "English (US)",
isbn = "3540292438",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
pages = "230--241",
booktitle = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
}