@inproceedings{af942329b63b4edcae5a46b6d1eff8d1,
title = "Energy considerations in checkpointing and fault tolerance protocols",
abstract = "Exascale supercomputers will gather hundreds millions cores. The first problem that we address is resiliency and fault tolerance to reach application termination on such platforms. The second problem is energy consumption since such systems will consume enormous amount of energy. In this paper, we evaluate checkpointing and existing fault tolerance protocols from an energy point of view. We measure on a real testbed the power consumption of the main atomic operations found in these protocols. The first results show that process coordination and RAM consume more power than checkpointing and HDD logging. However, the results we presented in Joules per Bytes for I/O operations, emphasize that checkpointing and HDD logging consume more energy than RAM logging. Finally, we propose to consider energy consumption as a criterion for the choice of fault tolerance protocols. In terms of energy consumption, we should promote message logging for applications exchanging small volumes of data and coordination for applications involving few processes.",
keywords = "Checkpointing, Energy consumption, Evaluation, Fault tolerance protocols",
author = "Diouri, {M. El Mehdi} and Olivier Gl{\"u}ck and Laurent Lefevre and Franck Cappello",
year = "2012",
doi = "10.1109/DSNW.2012.6264670",
language = "English (US)",
isbn = "9781467322645",
series = "Proceedings of the International Conference on Dependable Systems and Networks",
booktitle = "2012 IEEE/IFIP 42nd International Conference on Dependable Systems and Networks Workshops, DSN-W 2012",
note = "2012 IEEE/IFIP 42nd International Conference on Dependable Systems and Networks Workshops, DSN-W 2012 ; Conference date: 25-06-2012 Through 28-06-2012",
}