@inproceedings{de5bc966caee4cb080614944b0ebfb91,
title = "Histogram sort with sampling",
abstract = "To minimize data movement, state-of-the-art parallel sorting algorithms use techniques based on sampling and histogramming to partition keys prior to redistribution. Sampling enables partitioning to be done using a representative subset of the keys, while histogramming enables evaluation and iterative improvement of a given partition. We introduce Histogram sort with sampling (HSS), which combines sampling and iterative histogramming to find high-quality partitions with minimal data movement and high practical performance. Compared to the best known (recently introduced) algorithm for finding these partitions, our algorithm requires a factor of Θ(log(p)/ log log(p)) less communication, and substantially less when compared to standard variants of Sample sort and Histogram sort. We provide a distributed-memory implementation of the proposed algorithm, compare its performance to two existing implementations, and provide a brief application study showing benefit of the new algorithm.",
keywords = "Data partitioning, Histogramming, Parallel sorting, Sampling",
author = "Vipul Harsh and Laxmikant Kale and Edgar Solomonik",
note = "Publisher Copyright: {\textcopyright} 2019 Copyright held by the owner/author(s). Publication rights licensed to the Association for Computing Machinery.; 31st ACM Symposium on Parallelism in Algorithms and Architectures, SPAA 2019 ; Conference date: 22-06-2019 Through 24-06-2019",
year = "2019",
month = jun,
day = "17",
doi = "10.1145/3323165.3323184",
language = "English (US)",
series = "Annual ACM Symposium on Parallelism in Algorithms and Architectures",
publisher = "Association for Computing Machinery",
pages = "201--212",
booktitle = "SPAA 2019 - Proceedings of the 31st ACM Symposium on Parallelism in Algorithms and Architectures",
address = "United States",
}