@inproceedings{d118fc4142c647aa9746da229464e0fc,
title = "Understanding Impact of Lossy Compression on Derivative-related Metrics in Scientific Datasets",
abstract = "Today's scientific simulations are producing ex-tremely large amount of data everyday, which induces grand challenges in transferring and storing the data efficiently. Error-bounded lossy compression has been thought of as the most promising solution to the bigdata issue, however, it would cause data distortion that has to be controlled carefully for user's post-hoc analysis. Recently, the preservation of quantities of interest has become a priority. Derivative-related metrics are critical quantities of interest for many applications across domains. How-ever, no prior research explored the impact of lossy compression on derivative-related metrics in particular. In this paper, we focus on understanding the impact of various error-controlled lossy compressors on multiple derivative-related metrics commonly concerned by users. We perform solid experiments that involve 5 state-of-the-art lossy compressors and 4 real-world application datasets. We summarize 5 valuable takeaways, which can shed some light in understanding the impact of lossy compression on derivative- related metrics.",
keywords = "Data Reduction, Derivative, HPC, Lossy Compression",
author = "Zhaoyuan Su and Sheng Di and Gok, {Ali Murat} and Yue Cheng and Franck Cappello",
note = "Funding Information: This research was supported by the ECP, Project Number: 17-SC-20-SC, a collaborative effort of two DOE organizations – the Office of Science and the National Nuclear Security Administration, responsible for the planning and preparation of a capable exascale ecosystem, including software, applications, hardware, advanced system engineering and early testbed platforms, to support the nation{\textquoteright}s exascale computing imperative. The material was supported by the U.S. Department of Energy, Office of Science, Advanced Scientific Computing Research (ASCR), under contract DE-AC02-06CH11357, and supported by the National Science Foundation under Grant OAC-2003709, OAC-2104023, CCF-1919075, CCF-1919113, OAC-2106446, and CMMI-2134689. We acknowledge the computing resources provided on Bebop (operated by Laboratory Computing Resource Center at Argonne) and on Theta and JLSE (operated by Argonne Leadership Computing Facility). REFERENCES Publisher Copyright: {\textcopyright} 2022 IEEE.; 8th IEEE/ACM International Workshop on Data Analysis and Reduction for Big Scientific Data, DRBSD-8 2022 ; Conference date: 13-11-2022 Through 18-11-2022",
year = "2022",
doi = "10.1109/DRBSD56682.2022.00011",
language = "English (US)",
series = "Proceedings of DRBSD-8 2022: 8th International Workshop on Data Analysis and Reduction for Big Scientific Data, Held in conjunction with SC 2022: The International Conference for High Performance Computing, Networking, Storage and Analysis",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "44--53",
booktitle = "Proceedings of DRBSD-8 2022",
address = "United States",
}