@inproceedings{8cf9c60aa31e4af1a4c87e8fc3a144d5,
title = "Baechi: Fast device placement of machine learning graphs",
abstract = "Machine Learning graphs (or models) can be challenging or impossible to train when either devices have limited memory, or the models are large. Splitting the model graph across multiple devices, today, largely relies on learning-based approaches to generate this placement. While it results in models that train fast on data (i.e., with low step times), learning-based model-parallelism is time-consuming, taking many hours or days to create a placement plan of operators on devices. We present the Baechi system, where we adopt an algorithmic approach to the placement problem for running machine learning training graphs on a small cluster of memory-constrained devices. We implemented Baechi so that it works modularly with TensorFlow. Our experimental results using GPUs show that Baechi generates placement plans in time 654X - 206K X faster than today's learning-based approaches, and the placed model's step time is only up to 6.2% higher than expert-based placements.",
keywords = "TensorFlow, constrained memory, distributed systems, machine learning systems, placement algorithms",
author = "Beomyeol Jeon and Linda Cai and Pallavi Srivastava and Jintao Jiang and Xiaolan Ke and Yitao Meng and Cong Xie and Indranil Gupta",
note = "Publisher Copyright: {\textcopyright} 2020 ACM.; 11th ACM Symposium on Cloud Computing, SoCC 2020 ; Conference date: 19-10-2020 Through 21-10-2020",
year = "2020",
month = oct,
day = "12",
doi = "10.1145/3419111.3421302",
language = "English (US)",
series = "SoCC 2020 - Proceedings of the 2020 ACM Symposium on Cloud Computing",
publisher = "Association for Computing Machinery",
pages = "416--430",
booktitle = "SoCC 2020 - Proceedings of the 2020 ACM Symposium on Cloud Computing",
address = "United States",
}