@inproceedings{57905157417346c78a4815e88fd57d14,
title = "Optimal Round and Sample-Size Complexity for Partitioning in Parallel Sorting",
abstract = "State-of-the-art parallel sorting algorithms for distributed-memory architectures are based on computing a balanced partitioning via sampling and histogramming. By finding samples that partition the sorted keys into evenly-sized chunks, these algorithms minimize the number of communication rounds required. Histogramming (computing positions of samples) guides sampling, enabling a decrease in the overall number of samples collected. We derive lower and upper bounds on the number of sampling/histogramming rounds required to compute a balanced partitioning. We improve on prior results to demonstrate that when using p processors, O(log∗p) rounds with O(p/log∗p) samples per round suffice. We match that with a lower bound that shows that any algorithm with O(p) samples per round requires at least ω(log∗p) rounds. Additionally, we prove the ω(p log p) samples lower bound for one round, thus proving that existing one round algorithms: sample sort, AMS sort [2] and HSS [16] have optimal sample size complexity. To derive the lower bound, we propose a hard randomized input distribution and apply classical results from the distribution theory of runs.",
keywords = "communication cost, communication lower bounds, parallel algorithms, parallel sorting",
author = "Wentao Yang and Vipul Harsh and Edgar Solomonik",
note = "Publisher Copyright: {\textcopyright} 2023 ACM.; 35th ACM Symposium on Parallelism in Algorithms and Architectures, SPAA 2023 ; Conference date: 17-06-2023 Through 19-06-2023",
year = "2023",
month = jun,
day = "17",
doi = "10.1145/3558481.3591076",
language = "English (US)",
series = "Annual ACM Symposium on Parallelism in Algorithms and Architectures",
publisher = "Association for Computing Machinery",
pages = "467--478",
booktitle = "SPAA 2023 - Proceedings of the 35th ACM Symposium on Parallelism in Algorithms and Architectures",
address = "United States",
}