@inproceedings{190281d1c5f74f36a5e40cf1b18bf29d,
title = "Decentralized Learning of Finite-Memory Policies in Dec-POMDPs",
abstract = "Multi-agent reinforcement learning (MARL) under partial observability is notoriously challenging as the agents only have asymmetric partial observations of the system. In this paper, we study MARL in decentralized partially observable Markov decision processes (Dec-POMDPs) with partial history sharing. In search of decentralized and tractable MARL solutions, we identify the appropriate conditions under which we can adopt the common information approach to naturally extend existing single-agent policy learners to Dec-POMDPs. In particular, under the conditions of bounded local memories and an efficient representation of the common information, we present a MARL algorithm that learns a near-optimal finite-memory policy in Dec-POMDPs. We establish the iteration complexity of the algorithm, which depends only linearly on the number of agents. Simulations on classic Dec-POMDP tasks show that our approach significantly outperforms existing decentralized solutions, and nearly matches the centralized ones that require stronger informational assumptions.",
keywords = "Multi-agent systems, decentralized control, decentralized optimization, machine learning, reinforcement learning",
author = "Weichao Mao and Kaiqing Zhang and Zhuoran Yang and Tamer Basar",
note = "Publisher Copyright: Copyright {\textcopyright} 2023 The Authors. This is an open access article under the CC BY-NC-ND license (https://creativecommons.org/licenses/by-nc-nd/4.0/); 22nd IFAC World Congress ; Conference date: 09-07-2023 Through 14-07-2023",
year = "2023",
month = jul,
day = "1",
doi = "10.1016/j.ifacol.2023.10.1346",
language = "English (US)",
series = "IFAC-PapersOnLine",
publisher = "Elsevier B.V.",
number = "2",
pages = "2601--2607",
editor = "Hideaki Ishii and Yoshio Ebihara and Jun-ichi Imura and Masaki Yamakita",
booktitle = "IFAC-PapersOnLine",
edition = "2",
}