@inproceedings{c770829e80534a6d8938447204e6d368,
title = "From minimax value to low-regret algorithms for online Markov decision processes",
abstract = "The standard Markov Decision Process (MDP) framework assumes a stationary (or at least predictable) environment. Online learning algorithms can deal with non-stationary or unpredictable environments, but there is no notion of a state that might be changing throughout the learning process as a function of past actions. In recent years, there has been a growing interest in combining the above two frameworks and considering an MDP setting, where the cost function is allowed to change arbitrarily after each time step. However, most of the work in this area has been algorithmic: given a problem, one would design an algorithm from scratch and analyze its performance on a case-by-case basis. Moreover, the presence of the state and the assumption of an arbitrarily varying environment complicate both the theoretical analysis and the development of computationally efficient methods. This paper builds on recent results of Rakhlin et al. to give a general framework for deriving algorithms in an MDP setting with arbitrarily changing costs. This framework leads to a unifying view of existing methods and provides a general procedure for constructing new ones.",
keywords = "Machine learning, Markov processes",
author = "Peng Guan and Maxim Raginsky and Rebecca Willett",
year = "2014",
doi = "10.1109/ACC.2014.6858844",
language = "English (US)",
isbn = "9781479932726",
series = "Proceedings of the American Control Conference",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "471--476",
booktitle = "2014 American Control Conference, ACC 2014",
address = "United States",
note = "2014 American Control Conference, ACC 2014 ; Conference date: 04-06-2014 Through 06-06-2014",
}