@inproceedings{569cc0f3688e444a8dc42cb0f6134c7b,
title = "Coach: Exploiting Temporal Patterns for All-Resource Oversubscription in Cloud Platforms",
abstract = "Cloud platforms remain underutilized despite multiple proposals to improve their utilization (e.g., disaggregation, harvesting, and oversubscription). Our characterization of the resource utilization of virtual machines (VMs) in Azure reveals that, while CPU is the main underutilized resource, we need to provide a solution to manage all resources holistically. We also observe that many VMs exhibit complementary temporal patterns, which can be leveraged to improve the oversubscription of underutilized resources. Based on these insights, we propose Coach: a system that exploits temporal patterns for all-resource oversubscription in cloud platforms. Coach uses long-term predictions and an efficient VM scheduling policy to exploit temporally complementary patterns. We introduce a new general-purpose VM type, called CoachVM, where we partition each resource allocation into a guaranteed and an oversubscribed portion. Coach monitors the oversubscribed resources to detect contention and mitigate any potential performance degradation. We focus on memory management, which is particularly challenging due to memory's sensitivity to contention and the overhead required to reassign it between CoachVMs. Our experiments show that Coach enables platforms to host up to ∼26% more VMs with minimal performance degradation.",
keywords = "cloud computing, memory oversubscription, resource management, temporal patterns",
author = "Benjamin Reidys and Pantea Zardoshti and {\'I}{\~n}igo Goiri and Celine Irvene and Berger, {Daniel S.} and Haoran Ma and Kapil Arya and Eli Cortez and Taylor Stark and Eugene Bak and Mehmet Iyigun and Stanko Novakovic and Lisa Hsu and Karel Trueba and Abhisek Pan and Chetan Bansal and Saravan Rajmohan and Jian Huang and Ricardo Bianchini",
note = "We thank the anonymous reviewers and our shepherd, Michael Swift, for their valuable feedback and constructive suggestions that helped improve this paper. We thank Arup Roy, Patrick Payne, Milos Kralj, and the entire Core OS team at Microsoft Azure for their help. Benjamin Reidys and Jian Huang were partially supported by NSF grant CCF-1919044 and NSF CAREER Award CNS-2144796.; 30th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, ASPLOS 2025 ; Conference date: 30-03-2025 Through 03-04-2025",
year = "2025",
month = mar,
day = "30",
doi = "10.1145/3669940.3707226",
language = "English (US)",
series = "International Conference on Architectural Support for Programming Languages and Operating Systems - ASPLOS",
publisher = "Association for Computing Machinery",
pages = "164--181",
booktitle = "ASPLOS 2025 - Proceedings of the 30th ACM International Conference on Architectural Support for Programming Languages and Operating Systems",
address = "United States",
}