@inproceedings{301a199342c943939674aad22022aca1,
title = "Evaluating storage systems for scientific data in the cloud",
abstract = "Infrastructure-as-a-Service (IaaS) clouds are an appealing resource for scientific computing. However, the bare-bones presentation of raw Linux virtual machines leaves much to the application developer. For many cloud applications, effective data handling is critical to efficient application execution. This paper investigates the capabilities of a variety of POSIX-accessible distributed storage systems to manage data access patterns resulting from workflow application executions in the cloud. We leverage the expressivity of the Swift parallel scripting framework to benchmark the performance of a number of storage systems using synthetic workloads and three real-world applications. We characterize two representative commercial storage systems (Amazon S3 and HDFS, respectively) and two emerging research-based storage systems (Chirp/Parrot and MosaStore). We find the use of aggregated node-local resources effective and economical compared with remotely located S3 storage. Our experiments show that applications run at scale with MosaStore show up to 30% improvement in makespan time compared with those run with S3. We also find that storage-system driven application deployments in the cloud results in better runtime performance compared with an on-demand datastaging driven approach.",
keywords = "Cloud, Distributed computing, Storage systems",
author = "Ketan Maheshwari and Wozniak, {Justin M.} and Hao Yang and Katz, {Daniel S} and Matei Ripeanu and Victor Zavala and Michael Wilde",
year = "2014",
doi = "10.1145/2608029.2608034",
language = "English (US)",
isbn = "9781450329118",
series = "ScienceCloud 2014 - Proceedings of the 2014 ACM International Workshop on Scientific Cloud Computing, Co-located with HPDC 2014",
publisher = "Association for Computing Machinery",
pages = "33--40",
booktitle = "ScienceCloud 2014 - Proceedings of the 2014 ACM International Workshop on Scientific Cloud Computing, Co-located with HPDC 2014",
address = "United States",
note = "5th ACM Workshop on Scientific Cloud Computing, ScienceCloud 2014 ; Conference date: 23-06-2014 Through 27-06-2014",
}