@inproceedings{59cdba97649b43abb3c5d41c52887ff6,
title = "Adaptive event prediction strategy with dynamic time window for large-scale HPC systems",
abstract = "In this paper, we analyse messages generated by different HPC large-scale systems in order to extract sequences of correlated events which we lately use to predict the normal and faulty behaviour of the system. Our method uses a dynamic window strategy that is able to find frequent sequences of events regardless on the time delay between them. Most of the current related research narrows the correlation extraction to fixed and relatively small time windows that do not reflect the whole behaviour of the system. The generated events are in constant change during the lifetime of the machine. We consider that it is important to update the sequences at runtime by applying modifications after each prediction phase according to the forecast's accuracy and the difference between what was expected and what really happened. Our experiments show that our analysing system is able to predict around 60% of events with a precision of around 85% at a lower event granularity than before.",
keywords = "Event prediction, HPC systems, Logfile analysis",
author = "Ana Gainaru and Franck Cappello and Joshi Fullop and Stefan Trausan-Matu and William Kramer",
year = "2011",
doi = "10.1145/2038633.2038637",
language = "English (US)",
isbn = "9781450309783",
series = "Managing Large-Scale Systems via the Analysis of System Logs and the Application of Machine Learning Techniques, SLAML'11",
booktitle = "Managing Large-Scale Systems via the Analysis of System Logs and the Application of Machine Learning Techniques, SLAML'11",
note = "Managing Large-Scale Systems via the Analysis of System Logs and the Application of Machine Learning Techniques, SLAML'11 ; Conference date: 23-10-2011 Through 26-10-2011",
}