@inproceedings{d02394346820401b89e86a8cac559d9a,
title = "Acto: Automatic End-to-End Testing for Operation Correctness of Cloud System Management",
abstract = "Cloud systems are increasingly being managed by operation programs termed operators, which automate tedious, human-based operations. Operators of modern management platforms like Kubernetes, Twine, and ECS implement declarative interfaces based on the state-reconciliation principle. An operation declares a desired system state and the operator automatically reconciles the system to that declared state.Operator correctness is critical, given the impacts on system operations - -bugs in operator code put systems in un-desired or error states, with severe consequences. However, validating operator correctness is challenging due to the enormous system-state space and complex operation interface. A correct operator must not only satisfy correctness properties of its own code, but it must also maintain managed systems in desired states. Unfortunately, end-to-end testing of operators significantly falls short.We present Acto, the first automatic end-to-end testing technique for cloud system operators. Acto uses a state-centric approach to test an operator together with a managed system. Acto continuously instructs an operator to reconcile a system to different states and checks if the system successfully reaches those desired states. Acto models operations as state transitions and systematically realizes state-transition sequences to exercise supported operations in different scenarios. Acto's oracles automatically check whether a system's state is as desired. To date, Acto has helped find 56 serious new bugs (42 were confirmed and 30 have been fixed) in eleven Kubernetes operators with few false alarms.",
keywords = "cloud, kubernetes, operation, operation correctness, operator, reliability, system management",
author = "Gu, {Jiawei Tyler} and Xudong Sun and Wentao Zhang and Yuxuan Jiang and Chen Wang and Mandana Vaziri and Owolabi Legunsen and Tianyin Xu",
note = "Publisher Copyright: {\textcopyright} 2023 ACM.; 29th ACM Symposium on Operating Systems Principles, SOSP 2023 ; Conference date: 23-10-2023 Through 26-10-2023",
year = "2023",
month = oct,
day = "23",
doi = "10.1145/3600006.3613161",
language = "English (US)",
series = "SOSP 2023 - Proceedings of the 29th ACM Symposium on Operating Systems Principles",
publisher = "Association for Computing Machinery",
pages = "96--112",
booktitle = "SOSP 2023 - Proceedings of the 29th ACM Symposium on Operating Systems Principles",
address = "United States",
}