@inproceedings{4af039e0b83a4e229f365aaee89c6eb0,
title = "Design of MILC lattice QCD application for GPU clusters",
abstract = "We present an implementation of the improved staggered quark action lattice QCD computation designed for execution on a GPU cluster. The parallelization strategy is based on dividing the space-time lattice along the time dimension and distributing the sub-lattices among the GPU cluster nodes. We provide a mixed-precision floating-point GPU implementation of the multi-mass conjugate gradient solver. Our single GPU implementation of the conjugate gradient solver achieves a 9x performance improvement over the highly optimized code executed on a state-of-the-art eight-core CPU node. The overall application executes almost six times faster on a GPU-enabled cluster vs. a conventional multi-core cluster. The developed code is currently used for running production QCD calculations with electromagnetic corrections.",
keywords = "GPU, MILC, Quantum chromodynamics, conjugate gradient",
author = "Guochun Shi and Steven Gottlieb and Aaron Torok and Volodymyr Kindratenko",
year = "2011",
doi = "10.1109/IPDPS.2011.43",
language = "English (US)",
isbn = "9780769543857",
series = "Proceedings - 25th IEEE International Parallel and Distributed Processing Symposium, IPDPS 2011",
pages = "363--371",
booktitle = "Proceedings - 25th IEEE International Parallel and Distributed Processing Symposium, IPDPS 2011",
note = "25th IEEE International Parallel and Distributed Processing Symposium, IPDPS 2011 ; Conference date: 16-05-2011 Through 20-05-2011",
}