@inproceedings{0378db8fcfd44b62b05a1a4c0fadaecb,
title = "MiDas: Containerizing Data-Intensive Applications with I/O Specialization",
abstract = "Scientific applications often depend on data produced from computational models. Model-generated data can be prohibitively large. Current mechanisms for sharing and distributing reproducible applications, such as containers, assume all model data is saved and included with a program to support its successful re-execution. However, including model data increases the sizes of containers. This increases the cost and time required for deployment and further reuse. We present a framework named MiDas ({"}Minimizing Datasets{"}) for specializing I/O libraries which, given an application, automates the process of identifying and including only a subset of the data accessed by the program. To do this, MiDas combines static and dynamic analysis techniques to map high level user inputs to low level file offsets. We show several orders of magnitude reduction in data size via specialization of I/O libraries associated with model-based data-intensive applications, such as those operating on meteorological and geophysical data.",
keywords = "I/O specialization, containers, data-intensive",
author = "Chaitra Niddodi and Ashish Gehani and Tanu Malik and Navas, {Jorge A.} and Sibin Mohan",
note = "Funding Information: This material is based upon work supported by the National Science Foundation (NSF) under Grant ACI-1440800 and the Office of Naval Research (ONR) under Contract N68335-17-C-0558. Any opinions, findings and conclusions or recommendations expressed in this material are those of the authors and do not necessarily reflect the views of NSF or ONR. Publisher Copyright: {\textcopyright} 2020 ACM.; 3rd International Workshop on Practical Reproducible Evaluation of Computer Systems, P-RECS 2020 ; Conference date: 23-06-2020",
year = "2020",
month = jun,
day = "23",
doi = "10.1145/3391800.3398174",
language = "English (US)",
series = "P-RECS 2020 - Proceedings of the 3rd International Workshop on Practical Reproducible Evaluation of Computer Systems",
publisher = "Association for Computing Machinery",
pages = "21--26",
booktitle = "P-RECS 2020 - Proceedings of the 3rd International Workshop on Practical Reproducible Evaluation of Computer Systems",
address = "United States",
}