@inproceedings{17bf85fc789c4e9c97220984febb4bc6,
title = "Partitioning Low-Diameter Networks to Eliminate Inter-Job Interference",
abstract = "On most supercomputers, except some torus network based systems, resource managers allocate nodes to jobs without considering the sharing of network resources by different jobs. Such network-oblivious resource allocations result in link sharing among multiple jobs that can cause significant performance variability and performance degradation for individual jobs. In this paper, we explore low-diameter networks and corresponding node allocation policies that can eliminate inter-job interference. We propose a variation to n-dimensional mesh networks called express mesh. An express mesh is denser than the corresponding mesh network, has a low diameter independent of the number of routers, and is easily partitionable. We compare structural properties and performance of express mesh with other popular low-diameter networks. We present practical node allocation policies for express mesh and fat-tree networks that not only eliminate inter-job interference and performance variability, but also improve overall performance.",
keywords = "Network topology, express mesh, inter-job interference, partitionability, simulation",
author = "Nikhil Jain and Abhinav Bhatele and Xiang Ni and Todd Gamblin and Kale, {Laxmikant V.}",
note = "Funding Information: ACKNOWLEDGMENT This work was performed under the auspices of the U.S. Department of Energy by Lawrence Livermore National Laboratory under Contract DE-AC52-07NA27344 (LLNL-CONF-706801). Publisher Copyright: {\textcopyright} 2017 IEEE.; 31st IEEE International Parallel and Distributed Processing Symposium, IPDPS 2017 ; Conference date: 29-05-2017 Through 02-06-2017",
year = "2017",
month = jun,
day = "30",
doi = "10.1109/IPDPS.2017.91",
language = "English (US)",
series = "Proceedings - 2017 IEEE 31st International Parallel and Distributed Processing Symposium, IPDPS 2017",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "439--448",
booktitle = "Proceedings - 2017 IEEE 31st International Parallel and Distributed Processing Symposium, IPDPS 2017",
address = "United States",
}