@inproceedings{98d555af1b3a4b79a6b9c4912d536a1b,
title = "Transparent Checkpointing for Automatic Differentiation of Program Loops Through Expression Transformations",
abstract = "Automatic differentiation (AutoDiff) in machine learning is largely restricted to expressions used for neural networks (NN), with the depth rarely exceeding a few tens of layers. Compared to NN, numerical simulations typically involve iterative algorithms like time steppers that lead to millions of iterations. Even for modest-sized models, this may yield infeasible memory requirements when applying the adjoint method, also called backpropagation, to time-dependent problems. In this situation, checkpointing algorithms provide a trade-off between recomputation and storage. This paper presents the package Checkpointing.jl that leverages expression transformations in the programming language Julia and the package ChainRules.jl to automatically and transparently transform loop iterations into differentiated loops. The user may choose between various checkpointing algorithm schemes and storage devices. We describe the unique design of Checkpointing.jl and demonstrate its features on an automatically differentiated MPI implementation of Burgers' equation on the Polaris cluster at the Argonne Leadership Computing Facility.",
keywords = "Automatic differentiation, Checkpointing, Julia",
author = "Michel Schanen and Narayanan, {Sri Hari Krishna} and Sarah Williamson and Valentin Churavy and Moses, {William S.} and Ludger Paehler",
note = "We would like to thank Paul Hovland and Jan H\u00FCckelheim for their valuable suggestions and discussions. This work was funded and/or supported by NSF Cyberinfrastructure for Sustained Scientific Innovation (CSSI) award numbers: 2104068, 2103942, and 2103804, Argonne Leadership Computing Facility, which is a U.S. Department of Energy (DOE) Office of Science User Facility supported under Contract DE-AC02-06CH11357, DOE Computational Sciences Graduate Fellowship, NSF (grants OAC-1835443, AGS-1835860, and AGS-1835881), DARPA under agreement number HR0011-20-9-0016 (PaPPa), Schmidt Futures program, Paul G. Allen Family Foundation, Charles Trimble, Audi Environmental Foundation, DOE, National Nuclear Security Administration under Award Number DE-NA0003965, LANL grant 531711, and German Research Council (DFG) under grant agreement No. 326472365. Research was sponsored in part by the US Air Force Research Laboratory and the United States Air Force Artificial Intelligence Accelerator and was accomplished under Cooperative Agreement Number FA8750-19-2-1000. The views and conclusions contained in this document are those of the authors and should not be interpreted as representing the official policies, either expressed or implied, of the United States Air Force or the U.S. Government. The U.S. Government is authorized to reproduce and distribute reprints for Government purposes notwithstanding any copyright notation herein. This material is based upon work supported by the DOE, Office of Science, Office of Advanced Scientific Computing Research.",
year = "2023",
month = jun,
day = "26",
doi = "10.1007/978-3-031-36024-4_37",
language = "English (US)",
isbn = "9783031360237",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer",
pages = "483--497",
editor = "Ji{\v r}{\'i} Miky{\v s}ka and {de Mulatier}, Cl{\'e}lia and Maciej Paszynski and Krzhizhanovskaya, {Valeria V.} and Dongarra, {Jack J.} and Sloot, {Peter M.A.}",
booktitle = "Computational Science - ICCS 2023 - 23rd International Conference, Proceedings",
address = "Germany",
}