@article{3261e250082f4b3f825adbac2c40c851,
title = "Analytical convergence regions of accelerated gradient descent in nonconvex optimization under Regularity Condition",
abstract = "There is a growing interest in using robust control theory to analyze and design optimization and machine learning algorithms. This paper studies a class of nonconvex optimization problems whose cost functions satisfy the so-called Regularity Condition (RC). Empirical studies show that accelerated gradient descent (AGD) algorithms (e.g. Nesterov's acceleration and Heavy-ball) with proper initializations often work well in practice. However, the convergence of such AGD algorithms is largely unknown in the literature. The main contribution of this paper is the analytical characterization of the convergence regions of AGD under RC via robust control tools. Since such optimization problems arise frequently in many applications such as phase retrieval, training of neural networks and matrix sensing, our result shows promise of robust control theory in these areas.",
keywords = "Accelerated gradient descent, Nonconvex optimization, Regularity condition, Robust control",
author = "Huaqing Xiong and Yuejie Chi and Bin Hu and Wei Zhang",
note = "Funding Information: The work of H. Xiong and W. Zhang is partly supported by the National Science Foundation, USA under grant CNS-1552838.The work of Y. Chi is supported in part by ONR, USA under grant N00014-18-1-2142, by ARO, USA under grant W911NF-18-1-0303, and by NSF, USA under grants CCF-1806154 and ECCS-1818571. Funding Information: Theorem 15 Since z − 1 , z 0 ∈ N ϵ ∕ 10 cond ( P ) ( x ⋆ ) , we have ‖ ϕ 0 − ϕ ∗ ‖ < ϵ 5 cond ( P ) . The exponential decay with P ≻ 0 : ( ϕ k + 1 − ϕ ∗ ) T P ( ϕ k + 1 − ϕ ∗ ) ≤ ρ 2 ( ϕ k − ϕ ∗ ) T P ( ϕ k − ϕ ∗ ) implies that ‖ ϕ k − ϕ ∗ ‖ ≤ cond ( P ) ρ k ‖ ϕ 0 − ϕ ∗ ‖ , which we have argued in Section 3.2 . Therefore, ‖ ϕ k − ϕ ∗ ‖ ≤ cond ( P ) ρ k ‖ ϕ 0 − ϕ ∗ ‖ < cond ( P ) ⋅ ‖ ϕ 0 − ϕ ∗ ‖ < cond ( P ) ⋅ ϵ 5 cond ( P ) < ϵ ∕ 5 . As a consequence, ‖ y k − x ⋆ ‖ = ‖ ( 1 + β 2 ) z k ( 1 ) − β 2 z k ( 2 ) − x ⋆ ‖ = ‖ C ( ϕ k − ϕ ∗ ) ‖ ≤ ‖ C T ‖ ‖ ϕ k − ϕ ∗ ‖ < ( 1 + β 2 ) 2 + β 2 2 ⋅ ϵ ∕ 5 < ϵ , where we recall C = [ 1 + β 2 , − β 2 ] , and the last line used β 2 < 1 . Huaqing Xiong received his B.E. in Control Science and Engineering from Zhejiang University in 2016, and is pursuing a Ph.D. degree in Electrical and Computer Engineering at the Ohio State University, where his research focuses on deep learning theory, reinforcement learning theory and nonconvex optimization. Yuejie Chi received the Ph.D. degree in Electrical Engineering from Princeton University in 2012, and the B.E. (Hon.) degree in Electrical Engineering from Tsinghua University, Beijing, China, in 2007. She was with The Ohio State University from 2012 to 2017. Since 2018, she is an Associate Professor with the department of Electrical and Computer Engineering at Carnegie Mellon University, where she holds the Robert E. Doherty Early Career Professorship. Her research interests include signal processing, statistical inference, machine learning, large-scale optimization, and their applications in data science, inverse problems, imaging, and sensing systems. She is a recipient of the PECASE Award, NSF CAREER Award, AFOSR and ONR Young Investigator Program Awards, Ralph E. Powe Junior Faculty Enhancement Award, Google Faculty Research Award, IEEE Signal Processing Society Young Author Best Paper Award and the Best Paper Award at the IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP). She has served as an Elected Member of the SPTM, SAM and MLSP Technical Committees of the IEEE Signal Processing Society. She currently serves as an Associate Editor of IEEE Trans. on Signal Processing. Bin Hu is currently an assistant professor in the Department of Electrical and Computer Engineering at the University of Illinois at Urbana-Champaign and affiliated with the Coordinated Science Laboratory. He received the Ph.D. degree in Aerospace Engineering and Mechanics at the University of Minnesota in 2016. Between July 2016 and July 2018, he was a postdoctoral researcher in the Wisconsin Institute for Discovery at the University of Wisconsin-Madison. His current research focuses on the interplay between machine learning and control. Wei Zhang is currently a Professor in the Department of Mechanical and Energy Engineering at the Southern University of Science and Technology (SUSTech), Shenzhen, China. He received the B.S. in Automatic Control from the University of Science and Technology of China in 2003, the M.S. in Electrical and Computer Engineering from the University of Kentucky in 2005, and the M.S. in Statistics and the Ph.D. in Electrical Engineering both from Purdue University in 2009. From January 2010 to August 2011, he was a Postdoctoral Researcher in the EECS Department at the University of California, Berkeley. Before joining SUSTech, he served as an Assistant Professor and then an Associate Professor (with tenure) of Electrical and Computer Engineering at the Ohio State University. His research interests include hybrid systems, control and learning theory, game theory, stochastic systems, and their applications in robotics, power systems, and intelligent transportations. He is a recipient of the NSF CAREER award and the Lumley Research Award at the Ohio State University. Publisher Copyright: {\textcopyright} 2019 Elsevier Ltd",
year = "2020",
month = mar,
doi = "10.1016/j.automatica.2019.108715",
language = "English (US)",
volume = "113",
journal = "Automatica",
issn = "0005-1098",
publisher = "Elsevier Limited",
}