@inproceedings{6c1f8fe6620c44138aed410514f63f7f,
title = "Exploring Lossy Compressibility through Statistical Correlations of Scientific Datasets",
abstract = "Lossy compression plays a growing role in scientific simulations where the cost of storing their output data can span terabytes. Using error bounded lossy compression reduces the amount of storage for each simulation; however, there is no known bound for the upper limit on lossy compressibility. Correlation structures in the data, choice of compressor and error bound are factors allowing larger compression ratios and improved quality metrics. Analyzing these three factors provides one direction towards quantifying lossy compressibility. As a first step, we explore statistical methods to characterize the correlation structures present in the data and their relationships, through functional regression models, to compression ratios. We observed a relationship between compression ratios and several statistics summarizing the correlation structure of the data, which is a first step towards evaluating the theoretical limits of lossy compressibility used to eventually predict compression performance and adapt compressors to correlation structures present in the data.",
keywords = "Compression, High performance computing, Lossy compression, Statistical correlation analysis",
author = "David Krasowska and Julie Bessac and Robert Underwood and Calhoun, {Jon C.} and Sheng Di and Franck Cappello",
note = "Funding Information: Clemson University is acknowledged for generous allotment of compute time on the Palmetto cluster. This material is based upon work supported by the National Science Foundation under Grant No. SHF-1910197, No. SHF-1617488. This material is based upon work supported in part by the Exascale Computing Project (17-SC-20-SC) of the U.S. Department of Energy (DOE), and by DOE{\textquoteright}s Advanced Scientific Research Office (ASCR) under contract DE-AC02-06CH11357. Publisher Copyright: {\textcopyright} 2021 IEEE.; 7th International Workshop on Data Analysis and Reduction for Big Scientific Data, DRBSD-7 2021 ; Conference date: 14-11-2021",
year = "2021",
doi = "10.1109/DRBSD754563.2021.00011",
language = "English (US)",
series = "Proceedings of DRBSD-7 2021: 7th International Workshop on Data Analysis and Reduction for Big Scientific Data, Held in conjunction with SC 2021: The International Conference for High Performance Computing, Networking, Storage and Analysis",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "47--53",
booktitle = "Proceedings of DRBSD-7 2021",
address = "United States",
}