@inproceedings{61903411f1d04c939201cfa72ff6e124,
title = "Mining historical issue repositories to heal large-scale online service systems",
abstract = "Online service systems have been increasingly popular and important nowadays. Reducing the MTTR (Mean Time to Restore) of a service remains one of the most important steps to assure the user-perceived availability of the service. To reduce the MTTR, a common practice is to restore the service by identifying and applying an appropriate healing action. In this paper, we present an automated mining-based approach for suggesting an appropriate healing action for a given new issue. Our approach suggests an appropriate healing action by adapting healing actions from the retrieved similar historical issues. We have applied our approach to a real-world and large-scale product online service. The studies on 243 real issues of the service show that our approach can effectively suggest appropriate healing actions (with 87% accuracy) to reduce the MTTR of the service. In addition, according to issue characteristics, we further study and categorize issues where automatic healing suggestion faces difficulties.",
keywords = "Online service system, healing action, incident management, issue repository",
author = "Rui Ding and Qiang Fu and Lou, {Jian Guang} and Qingwei Lin and Dongmei Zhang and Tao Xie",
note = "Publisher Copyright: {\textcopyright} 2014 IEEE.; 44th Annual IEEE/IFIP International Conference on Dependable Systems and Networks, DSN 2014 ; Conference date: 23-06-2014 Through 26-06-2014",
year = "2014",
month = sep,
day = "18",
doi = "10.1109/DSN.2014.39",
language = "English (US)",
series = "Proceedings of the International Conference on Dependable Systems and Networks",
publisher = "IEEE Computer Society",
pages = "311--322",
booktitle = "Proceedings of the International Conference on Dependable Systems and Networks",
}