@inproceedings{6f792e9dd1384b35989dbb37f0995f1b,
title = "End-to-end performance modeling of distributed GPU applications",
abstract = "With the growing number of GPU-based supercomputing platforms and GPU-enabled applications, the ability to accurately model the performance of such applications is becoming increasingly important. Most current performance models for GPU-enabled applications are limited to single node performance. In this work, we propose a methodology for end-to-end performance modeling of distributed GPU applications. Our work strives to create performance models that are both accurate and easily applicable to any distributed GPU application. We combine trace-driven simulation of MPI communication using the TraceR-CODES framework with a profiling-based roofline model for GPU kernels. We make substantial modifications to these models to capture the complex effects of both on-node and off-node networks in today's multi-GPU supercomputers. We validate our model against empirical data from GPU platforms and also vary tunable parameters of our model to observe how they might affect application performance.",
keywords = "GPU computing, communication, performance modeling, trace-driven simulation",
author = "Jaemin Choi and Richards, {David F.} and Kale, {Laxmikant V.} and Abhinav Bhatele",
note = "Publisher Copyright: {\textcopyright} 2020 ACM.; 34th ACM International Conference on Supercomputing, ICS 2020 ; Conference date: 29-06-2020 Through 02-07-2020",
year = "2020",
month = jun,
day = "29",
doi = "10.1145/3392717.3392737",
language = "English (US)",
series = "Proceedings of the International Conference on Supercomputing",
publisher = "Association for Computing Machinery",
booktitle = "Proceedings of the 34th ACM International Conference on Supercomputing, ICS 2020",
address = "United States",
}