@inproceedings{78698d53a5f147258180eef48b1c50c3,
title = "Gluon: A communication-optimizing substrate for distributed heterogeneous graph analytics",
abstract = "This paper introduces a new approach to building distributed-memory graph analytics systems that exploits heterogeneity in processor types (CPU and GPU), partitioning policies, and programming models. The key to this approach is Gluon, a communication-optimizing substrate. Programmers write applications in a shared-memory programming system of their choice and interface these applications with Gluon using a lightweight API. Gluon enables these programs to run on heterogeneous clusters and optimizes communication in a novel way by exploiting structural and temporal invariants of graph partitioning policies. To demonstrate Gluon's ability to support different programming models, we interfaced Gluon with the Galois and Ligra shared-memory graph analytics systems to produce distributed-memory versions of these systems named D-Galois and D-Ligra, respectively. To demonstrate Gluon's ability to support heterogeneous processors, we interfaced Gluon with IrGL, a state-of-the-art single-GPU system for graph analytics, to produce D-IrGL, the first multi-GPU distributed-memory graph analytics system. Our experiments were done on CPU clusters with up to 256 hosts and roughly 70,000 threads and on multi-GPU clusters with up to 64 GPUs. The communication optimizations in Gluon improve end-to-end application execution time by g1/42.6× on the average. D-Galois and D-IrGL scale well and are faster than Gemini, the state-of-the-art distributed CPU graph analytics system, by factors of g1/43.9× and g1/44.9×, respectively, on the average.",
keywords = "Distributed-memory graph analytics, GPUs, big data, communication optimizations, heterogeneous architectures",
author = "Roshan Dathathri and Gurbinder Gill and Loc Hoang and Dang, {Hoang Vu} and Alex Brooks and Nikoli Dryden and Marc Snir and Keshav Pingali",
note = "This research was supported by NSF grants 1337217, 1337281, 1406355, 1618425, 1725322 and by DARPA contracts FA8750-16-2-0004 and FA8650-15-C-7563. This work used XSEDE grant ACI-1548562 through allocation TG-CIE170005. We used the Bridges system, supported by NSF award number ACI-1445606, at the Pittsburgh Supercomputing Center, and the Stampede system at Texas Advanced Computing Center, University of Texas at Austin.; 39th ACM SIGPLAN Conference on Programming Language Design and Implementation, PLDI 2018 ; Conference date: 18-06-2018 Through 22-06-2018",
year = "2018",
month = jun,
day = "11",
doi = "10.1145/3192366.3192404",
language = "English (US)",
series = "ACM SIGPLAN Notices",
publisher = "Association for Computing Machinery",
number = "4",
pages = "752--768",
editor = "Foster, {Jeffrey S.} and Dan Grossman and Foster, {Jeffrey S.}",
booktitle = "PLDI 2018 - Proceedings of the 39th ACM SIGPLAN Conference on Programming Language Design and Implementation",
address = "United States",
}