@inproceedings{68af375447c3469fbe6bade7e436f700,
title = "Lupin: Tolerating Partial Failures in a CXL Pod",
abstract = "A compute express link (CXL) pod is a collection of hosts attached to a CXL memory module. It provides an opportunity to port single-host shared-memory programs to execute on multiple hosts in a CXL pod, where the ported application achieves higher performance than a distributed application that uses network for coordination. The cost of performance scaling on a CXL pod is that applications should tolerate partial failures, where one process or operating system fails or reboots. Lupin is system software that includes kernel modifications and user-level libraries to help applications remain available while they recover from partial failures using the contents of CXL memory.",
keywords = "CXL, Partial failure tolerance",
author = "Zhiting Zhu and Newton Ni and Yibo Huang and Yan Sun and Zhipeng Jia and Kim, {Nam Sung} and Emmett Witchel",
note = "Publisher Copyright: {\textcopyright} 2024 ACM.; 2nd Workshop on Disruptive Memory Systems, DIMES 2024, co-located with the 30th ACM Symposium on Operating Systems Principles, SOSP 2024 ; Conference date: 03-11-2024",
year = "2024",
month = nov,
day = "3",
doi = "10.1145/3698783.3699377",
language = "English (US)",
series = "DIMES 2024 - Proceedings of the 2nd Workshop on Disruptive Memory Systems, Part of: SOSP 2024",
publisher = "Association for Computing Machinery",
pages = "41--50",
booktitle = "DIMES 2024 - Proceedings of the 2nd Workshop on Disruptive Memory Systems, Part of",
address = "United States",
}