@inproceedings{69e0a9eb41db4a6ead4a2f219ad2dc3a,
title = "Toward understanding soft faults in high performance cluster networks",
abstract = "Fault management in high performance cluster networks has been focused on the notion of hard faults (Le., link or node failures). Network degradations that negatively impact performance but do not result in failures often go unnoticed. In this paper, we classify such degradations as 80ft faults. In addition, we identify consistent performance as an important requirement in cluster networks. Using this service requirement, we describe a comprehensive strategy for cluster fault management.",
keywords = "Cluster, Fault management, Interconnection networks, Soft faults",
author = "Evans, {Jeffrey J.} and Seongbok Baik and Hood, {Cynthia S.} and William Gropp",
year = "2003",
doi = "10.1007/978-0-387-35674-7",
language = "English (US)",
isbn = "9781475755213",
series = "IFIP Advances in Information and Communication Technology",
publisher = "Springer",
pages = "117--120",
booktitle = "Integrated Network Management VIII",
address = "Germany",
note = "IFIP/IEEE 8th International Symposium on Integrated Network Management, IM 2003 ; Conference date: 24-03-2003 Through 28-03-2003",
}