@inproceedings{ed44e77bbc3046eda8211f2cf0b05ab3,
title = "Fast Compaction Algorithms for NoSQL Databases",
abstract = "Compaction plays a crucial role in NoSQL systems to ensure a high overall read throughput. In this work, we formally define compaction as an optimization problem that attempts to minimize disk I/O. We prove this problem to be NPHard. We then propose a set of algorithms and mathematically analyze upper bounds on worst-case cost. We evaluate the proposed algorithms on real-life workloads. Our results show that our algorithms incur low I/O costs and that a compaction approach using a balanced tree is most preferable.",
keywords = "compaction, greedy approximation algorithm, nosql, np-hard",
author = "Mainak Ghosh and Indranil Gupta and Shalmoli Gupta and Nirman Kumar",
note = "Publisher Copyright: {\textcopyright} 2015 IEEE.; 35th IEEE International Conference on Distributed Computing Systems, ICDCS 2015 ; Conference date: 29-06-2015 Through 02-07-2015",
year = "2015",
month = jul,
day = "22",
doi = "10.1109/ICDCS.2015.53",
language = "English (US)",
series = "Proceedings - International Conference on Distributed Computing Systems",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "452--461",
booktitle = "Proceedings - 2015 IEEE 35th International Conference on Distributed Computing Systems, ICDCS 2015",
address = "United States",
}